Skip to content

Commit bcc2758

Browse files
Backport PR #32490: BUG: Fix bug, where BooleanDtype columns are converted to Int64 (#32660)
Co-authored-by: Anna Daglis <[email protected]>
1 parent e09abde commit bcc2758

File tree

5 files changed

+26
-8
lines changed

5 files changed

+26
-8
lines changed

pandas/core/arrays/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,8 @@ def astype(self, dtype, copy=True):
589589
if getattr(self.dtype, "tz", None) is None:
590590
return self.tz_localize(new_tz)
591591
result = self.tz_convert(new_tz)
592+
if copy:
593+
result = result.copy()
592594
if new_tz is None:
593595
# Do we want .astype('datetime64[ns]') to be an ndarray.
594596
# The astype in Block._astype expects this to return an

pandas/core/dtypes/cast.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -1045,8 +1045,8 @@ def convert_dtypes(
10451045
dtype
10461046
new dtype
10471047
"""
1048-
1049-
if convert_string or convert_integer or convert_boolean:
1048+
is_extension = is_extension_array_dtype(input_array.dtype)
1049+
if (convert_string or convert_integer or convert_boolean) and not is_extension:
10501050
try:
10511051
inferred_dtype = lib.infer_dtype(input_array)
10521052
except ValueError:
@@ -1059,9 +1059,7 @@ def convert_dtypes(
10591059
if convert_integer:
10601060
target_int_dtype = "Int64"
10611061

1062-
if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype(
1063-
input_array.dtype
1064-
):
1062+
if is_integer_dtype(input_array.dtype):
10651063
from pandas.core.arrays.integer import _dtypes
10661064

10671065
inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype)
@@ -1075,9 +1073,7 @@ def convert_dtypes(
10751073
inferred_dtype = input_array.dtype
10761074

10771075
if convert_boolean:
1078-
if is_bool_dtype(input_array.dtype) and not is_extension_array_dtype(
1079-
input_array.dtype
1080-
):
1076+
if is_bool_dtype(input_array.dtype):
10811077
inferred_dtype = "boolean"
10821078
else:
10831079
if isinstance(inferred_dtype, str) and inferred_dtype == "boolean":

pandas/core/internals/blocks.py

+3
Original file line numberDiff line numberDiff line change
@@ -2209,6 +2209,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
22092209
# if we are passed a datetime64[ns, tz]
22102210
if is_datetime64tz_dtype(dtype):
22112211
values = self.values
2212+
if copy:
2213+
# this should be the only copy
2214+
values = values.copy()
22122215
if getattr(values, "tz", None) is None:
22132216
values = DatetimeArray(values).tz_localize("UTC")
22142217
values = values.tz_convert(dtype.tz)

pandas/tests/arrays/test_datetimes.py

+12
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,18 @@ def test_astype_to_same(self):
151151
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
152152
assert result is arr
153153

154+
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
155+
@pytest.mark.parametrize(
156+
"other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
157+
)
158+
def test_astype_copies(self, dtype, other):
159+
# https://github.com/pandas-dev/pandas/pull/32490
160+
s = pd.Series([1, 2], dtype=dtype)
161+
orig = s.copy()
162+
t = s.astype(other)
163+
t[:] = pd.NaT
164+
tm.assert_series_equal(s, orig)
165+
154166
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
155167
def test_astype_int(self, dtype):
156168
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])

pandas/tests/series/test_convert_dtypes.py

+5
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,8 @@ def test_convert_string_dtype(self):
279279
)
280280
result = df.convert_dtypes()
281281
tm.assert_frame_equal(df, result)
282+
283+
def test_convert_bool_dtype(self):
284+
# GH32287
285+
df = pd.DataFrame({"A": pd.array([True])})
286+
tm.assert_frame_equal(df, df.convert_dtypes())

0 commit comments

Comments
 (0)