Skip to content

DEPR: coercing bools to numeric on concat with numeric dtypes #45101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ Other Deprecations
- Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`)
- Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`)
- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
- Deprecated behavior of :func:`concat` between objects with bool-dtype and numeric-dtypes; in a future version these will cast to object dtype instead of coercing bools to numeric values (:issue:`39817`)
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
Expand Down
18 changes: 16 additions & 2 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
TYPE_CHECKING,
cast,
)
import warnings

import numpy as np

from pandas._typing import (
ArrayLike,
DtypeObj,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
astype_array,
Expand Down Expand Up @@ -144,8 +146,20 @@ def is_nonempty(x) -> bool:
else:
# coerce to object
to_concat = [x.astype("object") for x in to_concat]

return np.concatenate(to_concat, axis=axis)
kinds = {"o"}

result = np.concatenate(to_concat, axis=axis)
if "b" in kinds and result.dtype.kind in ["i", "u", "f"]:
# GH#39817
warnings.warn(
"Behavior when concatenating bool-dtype and numeric-dtype arrays is "
"deprecated; in a future version these will cast to object dtype "
"(instead of coercing bools to numeric values). To retain the old "
"behavior, explicitly cast bool-dtype arrays to numeric dtype.",
FutureWarning,
stacklevel=find_stack_level(),
)
return result


def union_categoricals(
Expand Down
27 changes: 20 additions & 7 deletions pandas/tests/reshape/concat/test_append_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,16 @@ def test_concatlike_dtypes_coercion(self, item, item2):
# instead of a list; we have separate dedicated tests for categorical
return

warn = None
# specify expected dtype
if typ1 == "bool" and typ2 in ("int64", "float64"):
# series coerces to numeric based on numpy rule
# index doesn't because bool is object dtype
exp_series_dtype = typ2
warn = FutureWarning
elif typ2 == "bool" and typ1 in ("int64", "float64"):
exp_series_dtype = typ1
warn = FutureWarning
elif (
typ1 == "datetime64[ns, US/Eastern]"
or typ2 == "datetime64[ns, US/Eastern]"
Expand Down Expand Up @@ -238,23 +241,33 @@ def test_concatlike_dtypes_coercion(self, item, item2):
# ----- Series ----- #

# series._append
res = Series(vals1)._append(Series(vals2), ignore_index=True)
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
# GH#39817
res = Series(vals1)._append(Series(vals2), ignore_index=True)
exp = Series(exp_data, dtype=exp_series_dtype)
tm.assert_series_equal(res, exp, check_index_type=True)

# concat
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
# GH#39817
res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
tm.assert_series_equal(res, exp, check_index_type=True)

# 3 elements
res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
# GH#39817
res = Series(vals1)._append(
[Series(vals2), Series(vals3)], ignore_index=True
)
exp = Series(exp_data3, dtype=exp_series_dtype)
tm.assert_series_equal(res, exp)

res = pd.concat(
[Series(vals1), Series(vals2), Series(vals3)],
ignore_index=True,
)
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
# GH#39817
res = pd.concat(
[Series(vals1), Series(vals2), Series(vals3)],
ignore_index=True,
)
tm.assert_series_equal(res, exp)

def test_concatlike_common_coerce_to_pandas_object(self):
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/reshape/concat/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,12 @@ def test_concat_empty_series_timelike(self, tz, values):
],
)
def test_concat_empty_series_dtypes(self, left, right, expected):
result = concat([Series(dtype=left), Series(dtype=right)])
warn = None
if (left is np.bool_ or right is np.bool_) and expected is not np.object_:
warn = FutureWarning
with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
# GH#39817
result = concat([Series(dtype=left), Series(dtype=right)])
assert result.dtype == expected

@pytest.mark.parametrize(
Expand Down