From 62ef6d5e8a6320fff926d0344e7a8dba75e0c3fc Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 11 May 2023 18:21:09 -0400 Subject: [PATCH 1/4] BUG/CoW: Series.rename not making a lazy copy when passed a scalar --- doc/source/whatsnew/v2.0.2.rst | 1 + pandas/core/series.py | 14 ++++++++++---- pandas/tests/copy_view/test_methods.py | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 0d6647dc38b3c..8ef733f825d00 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -31,6 +31,7 @@ Bug fixes - Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`) - Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`) - Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`) +- Bug in :meth:`Series.rename` not making a lazy copy when Copy-on-Write is enabled when a scalar is passed to it (:issue:`52450`) - Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`) diff --git a/pandas/core/series.py b/pandas/core/series.py index 84fa874831d85..cfb8f79c50959 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1907,7 +1907,9 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: df = self._constructor_expanddim(mgr) return df.__finalize__(self, method="to_frame") - def _set_name(self, name, inplace: bool = False) -> Series: + def _set_name( + self, name, inplace: bool = False, deep: bool | None = None + ) -> Series: """ Set the Series name. @@ -1916,9 +1918,11 @@ def _set_name(self, name, inplace: bool = False) -> Series: name : str inplace : bool Whether to modify `self` directly or return a copy. + deep : bool|None, default None + Whether to do a deep copy, a shallow copy, or Copy on Write(None) """ inplace = validate_bool_kwarg(inplace, "inplace") - ser = self if inplace else self.copy() + ser = self if inplace else self.copy(deep) ser.name = name return ser @@ -4580,7 +4584,7 @@ def rename( index: Renamer | Hashable | None = None, *, axis: Axis | None = None, - copy: bool = True, + copy: bool | None = None, inplace: bool = False, level: Level | None = None, errors: IgnoreRaise = "ignore", @@ -4667,7 +4671,9 @@ def rename( errors=errors, ) else: - return self._set_name(index, inplace=inplace) + return self._set_name( + index, inplace=inplace, deep=copy and not using_copy_on_write() + ) @Appender( """ diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 68b41682ab411..67fc91e0567ef 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -141,6 +141,7 @@ def test_methods_copy_keyword( "method", [ lambda ser, copy: ser.rename(index={0: 100}, copy=copy), + lambda ser, copy: ser.rename(None, copy=copy), lambda ser, copy: ser.reindex(index=ser.index, copy=copy), lambda ser, copy: ser.reindex_like(ser, copy=copy), lambda ser, copy: ser.align(ser, copy=copy)[0], @@ -158,6 +159,7 @@ def test_methods_copy_keyword( lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy), ], ids=[ + "rename (dict)", "rename", "reindex", "reindex_like", From 9ec0eb212bdb80bfff7eb0949173bd5de82d6802 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri, 12 May 2023 07:10:45 -0400 Subject: [PATCH 2/4] fix test --- pandas/tests/frame/indexing/test_setitem.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index af3632bffe948..3edfd47cb05a1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -316,7 +316,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit): df["dates"] = vals assert (df["dates"].values == ex_vals).all() - def test_setitem_dt64tz(self, timezone_frame): + def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): df = timezone_frame idx = df["B"].rename("foo") @@ -331,12 +331,16 @@ def test_setitem_dt64tz(self, timezone_frame): # assert that A & C are not sharing the same base (e.g. they # are copies) + # Note: This does not hold with Copy on Write (because of lazy copying) v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) v1base = v1._ndarray.base v2base = v2._ndarray.base - assert v1base is None or (id(v1base) != id(v2base)) + if not using_copy_on_write: + assert v1base is None or (id(v1base) != id(v2base)) + else: + assert id(v1base) == id(v2base) # with nan df2 = df.copy() From 512a20d0e9f4250a479401135ad7c1d375173f6e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 13 May 2023 20:20:05 -0400 Subject: [PATCH 3/4] Update series.py --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index cfb8f79c50959..dfa8f640068a2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1922,7 +1922,7 @@ def _set_name( Whether to do a deep copy, a shallow copy, or Copy on Write(None) """ inplace = validate_bool_kwarg(inplace, "inplace") - ser = self if inplace else self.copy(deep) + ser = self if inplace else self.copy(deep and not using_copy_on_write()) ser.name = name return ser @@ -4672,7 +4672,7 @@ def rename( ) else: return self._set_name( - index, inplace=inplace, deep=copy and not using_copy_on_write() + index, inplace=inplace, deep=copy ) @Appender( From 54d38f47c3bcc7f5ef96904b06155aef54975a62 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 13 May 2023 21:17:33 -0400 Subject: [PATCH 4/4] pre-commit --- pandas/core/series.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index dfa8f640068a2..e70b217e08dff 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4671,9 +4671,7 @@ def rename( errors=errors, ) else: - return self._set_name( - index, inplace=inplace, deep=copy - ) + return self._set_name(index, inplace=inplace, deep=copy) @Appender( """