From 5174711f0feeb724ce450f11325469bf49cd630b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Mar 2023 16:24:06 -0700 Subject: [PATCH 1/2] DEPR: concat with mismatched len(keys) and len(objs) --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/core/reshape/concat.py | 13 +++++++++++++ pandas/tests/reshape/concat/test_concat.py | 13 +++++++++++++ 3 files changed, 28 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 69a955dc3cd9f..a61400e20eaa4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -110,6 +110,7 @@ Deprecations - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`) - Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) +- Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`) - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`) @@ -129,6 +130,7 @@ Deprecations - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - + .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 0e920876e09f2..ceba90ba65d7b 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -14,12 +14,14 @@ cast, overload, ) +import warnings import numpy as np from pandas._config import using_copy_on_write from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( @@ -437,6 +439,17 @@ def __init__( # #1649 clean_keys = [] clean_objs = [] + keys = list(keys) + objs = list(objs) + if len(keys) != len(objs): + # GH#43485 + warnings.warn( + "The behavior of pd.concat with len(keys) != len(objs) is " + "deprecated. In a future version this will raise instead of " + "truncating to the smaller of the two sequences", + FutureWarning, + stacklevel=find_stack_level(), + ) for k, v in zip(keys, objs): if v is None: continue diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 32d789c118321..a571b63d30afc 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -785,3 +785,16 @@ def test_concat_ignore_empty_from_reindex(): result = concat([df1, df2.reindex(columns=df1.columns)], ignore_index=True) expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]}) tm.assert_frame_equal(result, expected) + + +def test_concat_mismatched_keys_lenght(): + # GH#43485 + ser = Series(range(5)) + sers = [ser + n for n in range(4)] + keys = ["A", "B", "C"] + + msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + concat(sers, keys=keys, axis=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat(sers, keys=keys, axis=0) From ed5801c5193600decf3bf3bddbcc030868223239 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Mar 2023 16:06:08 -0700 Subject: [PATCH 2/2] Fix+test iterators --- pandas/core/reshape/concat.py | 7 +++++-- pandas/tests/reshape/concat/test_concat.py | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index ceba90ba65d7b..3b6e4e0472e89 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -23,6 +23,7 @@ from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.common import is_iterator from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -439,8 +440,10 @@ def __init__( # #1649 clean_keys = [] clean_objs = [] - keys = list(keys) - objs = list(objs) + if is_iterator(keys): + keys = list(keys) + if is_iterator(objs): + objs = list(objs) if len(keys) != len(objs): # GH#43485 warnings.warn( diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index a571b63d30afc..244fe6a7927fe 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -787,7 +787,7 @@ def test_concat_ignore_empty_from_reindex(): tm.assert_frame_equal(result, expected) -def test_concat_mismatched_keys_lenght(): +def test_concat_mismatched_keys_length(): # GH#43485 ser = Series(range(5)) sers = [ser + n for n in range(4)] @@ -798,3 +798,7 @@ def test_concat_mismatched_keys_lenght(): concat(sers, keys=keys, axis=1) with tm.assert_produces_warning(FutureWarning, match=msg): concat(sers, keys=keys, axis=0) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat((x for x in sers), keys=(y for y in keys), axis=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + concat((x for x in sers), keys=(y for y in keys), axis=0)