Skip to content

Commit a2acd1b

Browse files
authored
BUG: Fix bug, where BooleanDtype columns are converted to Int64 (#32490)
1 parent 650cf74 commit a2acd1b

File tree

6 files changed

+28
-7
lines changed

6 files changed

+28
-7
lines changed

doc/source/whatsnew/v1.0.2.rst

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Bug fixes
6464
**Datetimelike**
6565

6666
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`)
67+
- Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware datetime64 dtype (:issue:`32490`)
6768
- Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`)
6869
- Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`)
6970

@@ -85,6 +86,7 @@ Bug fixes
8586
- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`)
8687
- Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`)
8788
- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`)
89+
- Fixed bug in :meth:`DataFrame.convert_dtypes`, where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`)
8890

8991
**Strings**
9092

pandas/core/arrays/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,8 @@ def astype(self, dtype, copy=True):
587587
if getattr(self.dtype, "tz", None) is None:
588588
return self.tz_localize(new_tz)
589589
result = self.tz_convert(new_tz)
590+
if copy:
591+
result = result.copy()
590592
if new_tz is None:
591593
# Do we want .astype('datetime64[ns]') to be an ndarray.
592594
# The astype in Block._astype expects this to return an

pandas/core/dtypes/cast.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,8 @@ def convert_dtypes(
10491049
dtype
10501050
new dtype
10511051
"""
1052-
if convert_string or convert_integer or convert_boolean:
1052+
is_extension = is_extension_array_dtype(input_array.dtype)
1053+
if (convert_string or convert_integer or convert_boolean) and not is_extension:
10531054
try:
10541055
inferred_dtype = lib.infer_dtype(input_array)
10551056
except ValueError:
@@ -1062,9 +1063,7 @@ def convert_dtypes(
10621063
if convert_integer:
10631064
target_int_dtype = "Int64"
10641065

1065-
if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype(
1066-
input_array.dtype
1067-
):
1066+
if is_integer_dtype(input_array.dtype):
10681067
from pandas.core.arrays.integer import _dtypes
10691068

10701069
inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype)
@@ -1078,9 +1077,7 @@ def convert_dtypes(
10781077
inferred_dtype = input_array.dtype
10791078

10801079
if convert_boolean:
1081-
if is_bool_dtype(input_array.dtype) and not is_extension_array_dtype(
1082-
input_array.dtype
1083-
):
1080+
if is_bool_dtype(input_array.dtype):
10841081
inferred_dtype = "boolean"
10851082
else:
10861083
if isinstance(inferred_dtype, str) and inferred_dtype == "boolean":

pandas/core/internals/blocks.py

+3
Original file line numberDiff line numberDiff line change
@@ -2228,6 +2228,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
22282228
# if we are passed a datetime64[ns, tz]
22292229
if is_datetime64tz_dtype(dtype):
22302230
values = self.values
2231+
if copy:
2232+
# this should be the only copy
2233+
values = values.copy()
22312234
if getattr(values, "tz", None) is None:
22322235
values = DatetimeArray(values).tz_localize("UTC")
22332236
values = values.tz_convert(dtype.tz)

pandas/tests/arrays/test_datetimes.py

+12
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,18 @@ def test_astype_to_same(self):
151151
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
152152
assert result is arr
153153

154+
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
155+
@pytest.mark.parametrize(
156+
"other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
157+
)
158+
def test_astype_copies(self, dtype, other):
159+
# https://github.com/pandas-dev/pandas/pull/32490
160+
s = pd.Series([1, 2], dtype=dtype)
161+
orig = s.copy()
162+
t = s.astype(other)
163+
t[:] = pd.NaT
164+
tm.assert_series_equal(s, orig)
165+
154166
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
155167
def test_astype_int(self, dtype):
156168
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])

pandas/tests/series/methods/test_convert_dtypes.py

+5
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,8 @@ def test_convert_string_dtype(self):
279279
)
280280
result = df.convert_dtypes()
281281
tm.assert_frame_equal(df, result)
282+
283+
def test_convert_bool_dtype(self):
284+
# GH32287
285+
df = pd.DataFrame({"A": pd.array([True])})
286+
tm.assert_frame_equal(df, df.convert_dtypes())

0 commit comments

Comments
 (0)