From 9a923bf62127f33d83427af9793ec3e6df6dd10b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 9 Feb 2024 22:54:23 +0100 Subject: [PATCH 1/2] CoW: Remove remaining occurrences from CoW --- pandas/core/base.py | 6 ++---- pandas/core/indexes/api.py | 12 +----------- pandas/core/indexes/base.py | 15 ++++++--------- pandas/core/indexes/datetimelike.py | 7 ++----- pandas/core/indexing.py | 10 ++-------- pandas/core/reshape/concat.py | 19 ++----------------- pandas/io/parsers/readers.py | 4 +--- pandas/io/pytables.py | 10 ++-------- 8 files changed, 18 insertions(+), 65 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index a1484d9ad032b..495b1ff09ab06 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -18,8 +18,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import lib from pandas._typing import ( AxisInt, @@ -661,10 +659,10 @@ def to_numpy( result = np.asarray(values, dtype=dtype) - if (copy and not fillna) or (not copy and using_copy_on_write()): + if (copy and not fillna) or not copy: if np.shares_memory(self._values[:2], result[:2]): # Take slices to improve performance of check - if using_copy_on_write() and not copy: + if not copy: result = result.view() result.flags.writeable = False else: diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 560285bd57a22..d830bf8345be8 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -74,7 +74,6 @@ def get_objs_combined_axis( intersect: bool = False, axis: Axis = 0, sort: bool = True, - copy: bool = False, ) -> Index: """ Extract combined index: return intersection or union (depending on the @@ -92,15 +91,13 @@ def get_objs_combined_axis( The axis to extract indexes from. sort : bool, default True Whether the result index should come out sorted or not. - copy : bool, default False - If True, return a copy of the combined index. Returns ------- Index """ obs_idxes = [obj._get_axis(axis) for obj in objs] - return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) def _get_distinct_objs(objs: list[Index]) -> list[Index]: @@ -121,7 +118,6 @@ def _get_combined_index( indexes: list[Index], intersect: bool = False, sort: bool = False, - copy: bool = False, ) -> Index: """ Return the union or intersection of indexes. @@ -135,8 +131,6 @@ def _get_combined_index( calculate the union. sort : bool, default False Whether the result index should come out sorted or not. - copy : bool, default False - If True, return a copy of the combined index. Returns ------- @@ -158,10 +152,6 @@ def _get_combined_index( if sort: index = safe_sort_index(index) - # GH 29879 - if copy: - index = index.copy() - return index diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0bb52ceb6aa7f..96e4796e4d8d9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,7 +22,6 @@ from pandas._config import ( get_option, - using_copy_on_write, using_pyarrow_string_dtype, ) @@ -1663,7 +1662,7 @@ def to_frame( if name is lib.no_default: name = self._get_level_names() - result = DataFrame({name: self}, copy=not using_copy_on_write()) + result = DataFrame({name: self}, copy=False) if index: result.index = self @@ -4800,13 +4799,11 @@ def values(self) -> ArrayLike: [(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] Length: 5, dtype: interval[int64, right] """ - if using_copy_on_write(): - data = self._data - if isinstance(data, np.ndarray): - data = data.view() - data.flags.writeable = False - return data - return self._data + data = self._data + if isinstance(data, np.ndarray): + data = data.view() + data.flags.writeable = False + return data @cache_readonly @doc(IndexOpsMixin.array) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 45decaf97a188..ed5b35c92877f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -18,8 +18,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import ( NaT, Timedelta, @@ -490,9 +488,8 @@ def _with_freq(self, freq): def values(self) -> np.ndarray: # NB: For Datetime64TZ this is lossy data = self._data._ndarray - if using_copy_on_write(): - data = data.view() - data.flags.writeable = False + data = data.view() + data.flags.writeable = False return data @doc(DatetimeIndexOpsMixin.shift) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4ccac6449d835..2ae3940d885ef 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,8 +13,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas.compat import PYPY @@ -894,7 +892,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self.obj) <= 2: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -2107,7 +2105,6 @@ def _setitem_with_indexer_frame_value( tuple(sub_indexer), value[item], multiindex_indexer, - using_cow=using_copy_on_write(), ) else: val = np.nan @@ -2337,7 +2334,6 @@ def _align_series( indexer, ser: Series, multiindex_indexer: bool = False, - using_cow: bool = False, ): """ Parameters @@ -2407,9 +2403,7 @@ def ravel(i): else: new_ix = Index(new_ix) if ser.index.equals(new_ix): - if using_cow: - return ser - return ser._values.copy() + return ser return ser.reindex(new_ix)._values diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 7e0bdbcb0ddba..c7293553b11dc 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -15,8 +15,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -368,13 +366,6 @@ def concat( 0 1 2 1 3 4 """ - if copy is None: - if using_copy_on_write(): - copy = False - else: - copy = True - elif copy and using_copy_on_write(): - copy = False op = _Concatenator( objs, @@ -385,7 +376,6 @@ def concat( levels=levels, names=names, verify_integrity=verify_integrity, - copy=copy, sort=sort, ) @@ -409,7 +399,6 @@ def __init__( names: list[HashableT] | None = None, ignore_index: bool = False, verify_integrity: bool = False, - copy: bool = True, sort: bool = False, ) -> None: if isinstance(objs, (ABCSeries, ABCDataFrame, str)): @@ -437,7 +426,6 @@ def __init__( self.ignore_index = ignore_index self.verify_integrity = verify_integrity - self.copy = copy objs, keys = self._clean_keys_and_objs(objs, keys) @@ -654,7 +642,7 @@ def get_result(self): cons = sample._constructor_expanddim index, columns = self.new_axes - df = cons(data, index=index, copy=self.copy) + df = cons(data, index=index, copy=False) df.columns = columns return df.__finalize__(self, method="concat") @@ -679,10 +667,8 @@ def get_result(self): mgrs_indexers.append((obj._mgr, indexers)) new_data = concatenate_managers( - mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy + mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=False ) - if not self.copy and not using_copy_on_write(): - new_data._consolidate_inplace() out = sample._constructor_from_mgr(new_data, axes=new_data.axes) return out.__finalize__(self, method="concat") @@ -708,7 +694,6 @@ def _get_comb_axis(self, i: AxisInt) -> Index: axis=data_axis, intersect=self.intersect, sort=self.sort, - copy=self.copy, ) @cache_readonly diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 07920eb1750f2..d9dc5b41a81c2 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -26,8 +26,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import lib from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import ( @@ -1967,7 +1965,7 @@ def read(self, nrows: int | None = None) -> DataFrame: new_col_dict, columns=columns, index=index, - copy=not using_copy_on_write(), + copy=False, ) self._currow += new_rows diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1e11a9783f0e1..97b9b905dfd62 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -30,7 +30,6 @@ from pandas._config import ( config, get_option, - using_copy_on_write, using_pyarrow_string_dtype, ) @@ -3294,13 +3293,8 @@ def read( dfs.append(df) if len(dfs) > 0: - out = concat(dfs, axis=1, copy=True) - if using_copy_on_write(): - # with CoW, concat ignores the copy keyword. Here, we still want - # to copy to enforce optimized column-major layout - out = out.copy() - out = out.reindex(columns=items, copy=False) - return out + out = concat(dfs, axis=1).copy() + return out.reindex(columns=items, copy=False) return DataFrame(columns=axes[0], index=axes[1]) From 54f0901ed2f503dead4e42756dce64ed5ca406a2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 10 Feb 2024 23:20:31 +0100 Subject: [PATCH 2/2] Fixup failing tests --- pandas/core/indexing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a267f61ec29cf..6d4817e7259e7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2105,6 +2105,7 @@ def _setitem_with_indexer_frame_value( tuple(sub_indexer), value[item], multiindex_indexer, + using_cow=True, ) else: val = np.nan @@ -2334,6 +2335,7 @@ def _align_series( indexer, ser: Series, multiindex_indexer: bool = False, + using_cow: bool = False, ): """ Parameters @@ -2403,7 +2405,9 @@ def ravel(i): else: new_ix = Index(new_ix) if ser.index.equals(new_ix): - return ser + if using_cow: + return ser + return ser._values.copy() return ser.reindex(new_ix)._values