diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 69a955dc3cd9f..a61400e20eaa4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -110,6 +110,7 @@ Deprecations - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`) - Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) +- Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`) - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`) @@ -129,6 +130,7 @@ Deprecations - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - + .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 0e920876e09f2..3b6e4e0472e89 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -14,13 +14,16 @@ cast, overload, ) +import warnings import numpy as np from pandas._config import using_copy_on_write from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.common import is_iterator from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -437,6 +440,19 @@ def __init__( # #1649 clean_keys = [] clean_objs = [] + if is_iterator(keys): + keys = list(keys) + if is_iterator(objs): + objs = list(objs) + if len(keys) != len(objs): + # GH#43485 + warnings.warn( + "The behavior of pd.concat with len(keys) != len(objs) is " + "deprecated. In a future version this will raise instead of " + "truncating to the smaller of the two sequences", + FutureWarning, + stacklevel=find_stack_level(), + ) for k, v in zip(keys, objs): if v is None: continue diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 32d789c118321..244fe6a7927fe 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -785,3 +785,20 @@ def test_concat_ignore_empty_from_reindex(): result = concat([df1, df2.reindex(columns=df1.columns)], ignore_index=True) expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]}) tm.assert_frame_equal(result, expected) + + +def test_concat_mismatched_keys_length(): + # GH#43485 + ser = Series(range(5)) + sers = [ser + n for n in range(4)] + keys = ["A", "B", "C"] + + msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + concat(sers, keys=keys, axis=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat(sers, keys=keys, axis=0) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat((x for x in sers), keys=(y for y in keys), axis=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat((x for x in sers), keys=(y for y in keys), axis=0)