diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 462f243f14494..7a25f7b58c2e7 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -64,6 +64,7 @@ Bug fixes **Datetimelike** - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) +- Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware datetime64 dtype (:issue:`32490`) - Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) **Categorical** @@ -84,6 +85,7 @@ Bug fixes - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug in :meth:`DataFrame.convert_dtypes`, where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) **Strings** diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9f19c7ba0be6e..7223eda22b3d9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -587,6 +587,8 @@ def astype(self, dtype, copy=True): if getattr(self.dtype, "tz", None) is None: return self.tz_localize(new_tz) result = self.tz_convert(new_tz) + if copy: + result = result.copy() if new_tz is None: # Do we want .astype('datetime64[ns]') to be an ndarray. # The astype in Block._astype expects this to return an diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7dac36b53fce5..97c02428cbdf9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1049,7 +1049,8 @@ def convert_dtypes( dtype new dtype """ - if convert_string or convert_integer or convert_boolean: + is_extension = is_extension_array_dtype(input_array.dtype) + if (convert_string or convert_integer or convert_boolean) and not is_extension: try: inferred_dtype = lib.infer_dtype(input_array) except ValueError: @@ -1062,9 +1063,7 @@ def convert_dtypes( if convert_integer: target_int_dtype = "Int64" - if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype( - input_array.dtype - ): + if is_integer_dtype(input_array.dtype): from pandas.core.arrays.integer import _dtypes inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) @@ -1078,9 +1077,7 @@ def convert_dtypes( inferred_dtype = input_array.dtype if convert_boolean: - if is_bool_dtype(input_array.dtype) and not is_extension_array_dtype( - input_array.dtype - ): + if is_bool_dtype(input_array.dtype): inferred_dtype = "boolean" else: if isinstance(inferred_dtype, str) and inferred_dtype == "boolean": diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c088b7020927b..bce440fe09319 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2228,6 +2228,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # if we are passed a datetime64[ns, tz] if is_datetime64tz_dtype(dtype): values = self.values + if copy: + # this should be the only copy + values = values.copy() if getattr(values, "tz", None) is None: values = DatetimeArray(values).tz_localize("UTC") values = values.tz_convert(dtype.tz) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a59ed429cc404..2c26e72a245f7 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -151,6 +151,18 @@ def test_astype_to_same(self): result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) + @pytest.mark.parametrize( + "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] + ) + def test_astype_copies(self, dtype, other): + # https://github.com/pandas-dev/pandas/pull/32490 + s = pd.Series([1, 2], dtype=dtype) + orig = s.copy() + t = s.astype(other) + t[:] = pd.NaT + tm.assert_series_equal(s, orig) + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 17527a09f07a1..a41f893e3753f 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -279,3 +279,8 @@ def test_convert_string_dtype(self): ) result = df.convert_dtypes() tm.assert_frame_equal(df, result) + + def test_convert_bool_dtype(self): + # GH32287 + df = pd.DataFrame({"A": pd.array([True])}) + tm.assert_frame_equal(df, df.convert_dtypes())