diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b10f201e79318..850391522dbff 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -225,6 +225,7 @@ Copy-on-Write improvements - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize` - :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects` - :meth:`DataFrame.astype` / :meth:`Series.astype` + - :meth:`DataFrame.convert_dtypes` / :meth:`Series.convert_dtypes` - :func:`concat` These methods return views when Copy-on-Write is enabled, which provides a significant diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aaf1d0e022fdf..1b96ff57a6efc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6642,7 +6642,7 @@ def convert_dtypes( # https://github.com/python/mypy/issues/8354 return cast(NDFrameT, result) else: - return self.copy() + return self.copy(deep=None) # ---------------------------------------------------------------------- # Filling NA's diff --git a/pandas/core/series.py b/pandas/core/series.py index 80dd0dd19f96f..a88dd224068ac 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5468,7 +5468,7 @@ def _convert_dtypes( if infer_objects: input_series = input_series.infer_objects() if is_object_dtype(input_series): - input_series = input_series.copy() + input_series = input_series.copy(deep=None) if convert_string or convert_integer or convert_boolean or convert_floating: dtype_backend = get_option("mode.dtype_backend") @@ -5483,7 +5483,7 @@ def _convert_dtypes( ) result = input_series.astype(inferred_dtype) else: - result = input_series.copy() + result = input_series.copy(deep=None) return result # error: Cannot determine type of 'isna' diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index a485275a28ac4..73343976e92fb 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -193,3 +193,42 @@ def test_astype_arrow_timestamp(using_copy_on_write): if using_copy_on_write: assert not result._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, "a").asi8, get_array(result, "a")._data) + + +def test_convert_dtypes_infer_objects(using_copy_on_write): + ser = Series(["a", "b", "c"]) + ser_orig = ser.copy() + result = ser.convert_dtypes( + convert_integer=False, + convert_boolean=False, + convert_floating=False, + convert_string=False, + ) + + if using_copy_on_write: + assert np.shares_memory(get_array(ser), get_array(result)) + else: + assert not np.shares_memory(get_array(ser), get_array(result)) + + result.iloc[0] = "x" + tm.assert_series_equal(ser, ser_orig) + + +def test_convert_dtypes(using_copy_on_write): + df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) + df_orig = df.copy() + df2 = df.convert_dtypes() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "d"), get_array(df, "d")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d")) + + df2.iloc[0, 0] = "x" + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/util.py b/pandas/tests/copy_view/util.py index b5f4f74fc7e5e..8e53c1644b0a2 100644 --- a/pandas/tests/copy_view/util.py +++ b/pandas/tests/copy_view/util.py @@ -2,7 +2,7 @@ from pandas.core.arrays import BaseMaskedArray -def get_array(obj, col): +def get_array(obj, col=None): """ Helper method to get array for a DataFrame column or a Series. @@ -10,8 +10,9 @@ def get_array(obj, col): which triggers tracking references / CoW (and we might be testing that this is done by some other operation). """ - if isinstance(obj, Series) and obj.name == col: + if isinstance(obj, Series) and (obj is None or obj.name == col): return obj._values + assert col is not None icol = obj.columns.get_loc(col) assert isinstance(icol, int) arr = obj._get_column_array(icol)