diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1c1415255bf89..c11ccfdbe1b33 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -606,6 +606,7 @@ Other Deprecations - Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`) - Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`) - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) +- Deprecated behavior of :func:`concat` between objects with bool-dtype and numeric-dtypes; in a future version these will cast to object dtype instead of coercing bools to numeric values (:issue:`39817`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 701f9fd4a9c99..2dc4241c6a303 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -5,6 +5,7 @@ TYPE_CHECKING, cast, ) +import warnings import numpy as np @@ -12,6 +13,7 @@ ArrayLike, DtypeObj, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( astype_array, @@ -144,8 +146,20 @@ def is_nonempty(x) -> bool: else: # coerce to object to_concat = [x.astype("object") for x in to_concat] - - return np.concatenate(to_concat, axis=axis) + kinds = {"o"} + + result = np.concatenate(to_concat, axis=axis) + if "b" in kinds and result.dtype.kind in ["i", "u", "f"]: + # GH#39817 + warnings.warn( + "Behavior when concatenating bool-dtype and numeric-dtype arrays is " + "deprecated; in a future version these will cast to object dtype " + "(instead of coercing bools to numeric values). To retain the old " + "behavior, explicitly cast bool-dtype arrays to numeric dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result def union_categoricals( diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index bb8027948c540..f36bee9dd8dec 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -204,13 +204,16 @@ def test_concatlike_dtypes_coercion(self, item, item2): # instead of a list; we have separate dedicated tests for categorical return + warn = None # specify expected dtype if typ1 == "bool" and typ2 in ("int64", "float64"): # series coerces to numeric based on numpy rule # index doesn't because bool is object dtype exp_series_dtype = typ2 + warn = FutureWarning elif typ2 == "bool" and typ1 in ("int64", "float64"): exp_series_dtype = typ1 + warn = FutureWarning elif ( typ1 == "datetime64[ns, US/Eastern]" or typ2 == "datetime64[ns, US/Eastern]" @@ -238,23 +241,33 @@ def test_concatlike_dtypes_coercion(self, item, item2): # ----- Series ----- # # series._append - res = Series(vals1)._append(Series(vals2), ignore_index=True) + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = Series(vals1)._append(Series(vals2), ignore_index=True) exp = Series(exp_data, dtype=exp_series_dtype) tm.assert_series_equal(res, exp, check_index_type=True) # concat - res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) tm.assert_series_equal(res, exp, check_index_type=True) # 3 elements - res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True) + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = Series(vals1)._append( + [Series(vals2), Series(vals3)], ignore_index=True + ) exp = Series(exp_data3, dtype=exp_series_dtype) tm.assert_series_equal(res, exp) - res = pd.concat( - [Series(vals1), Series(vals2), Series(vals3)], - ignore_index=True, - ) + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + res = pd.concat( + [Series(vals1), Series(vals2), Series(vals3)], + ignore_index=True, + ) tm.assert_series_equal(res, exp) def test_concatlike_common_coerce_to_pandas_object(self): diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 92f91a6e53add..82d2a8a2b1fd2 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -109,7 +109,12 @@ def test_concat_empty_series_timelike(self, tz, values): ], ) def test_concat_empty_series_dtypes(self, left, right, expected): - result = concat([Series(dtype=left), Series(dtype=right)]) + warn = None + if (left is np.bool_ or right is np.bool_) and expected is not np.object_: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): + # GH#39817 + result = concat([Series(dtype=left), Series(dtype=right)]) assert result.dtype == expected @pytest.mark.parametrize(