Skip to content

Commit 52c5703

Browse files
authored
DEPR: coercing bools to numeric on concat with numeric dtypes (#45101)
1 parent fd5a8f4 commit 52c5703

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ Other Deprecations
606606
- Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`)
607607
- Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`)
608608
- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
609+
- Deprecated behavior of :func:`concat` between objects with bool-dtype and numeric-dtypes; in a future version these will cast to object dtype instead of coercing bools to numeric values (:issue:`39817`)
609610
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
610611
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
611612
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)

pandas/core/dtypes/concat.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
TYPE_CHECKING,
66
cast,
77
)
8+
import warnings
89

910
import numpy as np
1011

1112
from pandas._typing import (
1213
ArrayLike,
1314
DtypeObj,
1415
)
16+
from pandas.util._exceptions import find_stack_level
1517

1618
from pandas.core.dtypes.cast import (
1719
astype_array,
@@ -144,8 +146,20 @@ def is_nonempty(x) -> bool:
144146
else:
145147
# coerce to object
146148
to_concat = [x.astype("object") for x in to_concat]
147-
148-
return np.concatenate(to_concat, axis=axis)
149+
kinds = {"o"}
150+
151+
result = np.concatenate(to_concat, axis=axis)
152+
if "b" in kinds and result.dtype.kind in ["i", "u", "f"]:
153+
# GH#39817
154+
warnings.warn(
155+
"Behavior when concatenating bool-dtype and numeric-dtype arrays is "
156+
"deprecated; in a future version these will cast to object dtype "
157+
"(instead of coercing bools to numeric values). To retain the old "
158+
"behavior, explicitly cast bool-dtype arrays to numeric dtype.",
159+
FutureWarning,
160+
stacklevel=find_stack_level(),
161+
)
162+
return result
149163

150164

151165
def union_categoricals(

pandas/tests/reshape/concat/test_append_common.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,16 @@ def test_concatlike_dtypes_coercion(self, item, item2):
204204
# instead of a list; we have separate dedicated tests for categorical
205205
return
206206

207+
warn = None
207208
# specify expected dtype
208209
if typ1 == "bool" and typ2 in ("int64", "float64"):
209210
# series coerces to numeric based on numpy rule
210211
# index doesn't because bool is object dtype
211212
exp_series_dtype = typ2
213+
warn = FutureWarning
212214
elif typ2 == "bool" and typ1 in ("int64", "float64"):
213215
exp_series_dtype = typ1
216+
warn = FutureWarning
214217
elif (
215218
typ1 == "datetime64[ns, US/Eastern]"
216219
or typ2 == "datetime64[ns, US/Eastern]"
@@ -238,23 +241,33 @@ def test_concatlike_dtypes_coercion(self, item, item2):
238241
# ----- Series ----- #
239242

240243
# series._append
241-
res = Series(vals1)._append(Series(vals2), ignore_index=True)
244+
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
245+
# GH#39817
246+
res = Series(vals1)._append(Series(vals2), ignore_index=True)
242247
exp = Series(exp_data, dtype=exp_series_dtype)
243248
tm.assert_series_equal(res, exp, check_index_type=True)
244249

245250
# concat
246-
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
251+
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
252+
# GH#39817
253+
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
247254
tm.assert_series_equal(res, exp, check_index_type=True)
248255

249256
# 3 elements
250-
res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
257+
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
258+
# GH#39817
259+
res = Series(vals1)._append(
260+
[Series(vals2), Series(vals3)], ignore_index=True
261+
)
251262
exp = Series(exp_data3, dtype=exp_series_dtype)
252263
tm.assert_series_equal(res, exp)
253264

254-
res = pd.concat(
255-
[Series(vals1), Series(vals2), Series(vals3)],
256-
ignore_index=True,
257-
)
265+
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
266+
# GH#39817
267+
res = pd.concat(
268+
[Series(vals1), Series(vals2), Series(vals3)],
269+
ignore_index=True,
270+
)
258271
tm.assert_series_equal(res, exp)
259272

260273
def test_concatlike_common_coerce_to_pandas_object(self):

pandas/tests/reshape/concat/test_empty.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,12 @@ def test_concat_empty_series_timelike(self, tz, values):
109109
],
110110
)
111111
def test_concat_empty_series_dtypes(self, left, right, expected):
112-
result = concat([Series(dtype=left), Series(dtype=right)])
112+
warn = None
113+
if (left is np.bool_ or right is np.bool_) and expected is not np.object_:
114+
warn = FutureWarning
115+
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
116+
# GH#39817
117+
result = concat([Series(dtype=left), Series(dtype=right)])
113118
assert result.dtype == expected
114119

115120
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)