pandas-dev · mroeschke · Mar 8, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -204,6 +204,7 @@ Removal of prior version deprecations/changes
 - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
 - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`)
 - Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`)
+- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
 - Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
 - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
 - Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`)
@@ -255,6 +256,7 @@ Removal of prior version deprecations/changes
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`)
 - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`)
 - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
 - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)

@@ -1202,7 +1202,7 @@ def _concat_objects(
             else:
                 # GH5610, returns a MI, with the first level being a
                 # range index
-                keys = list(range(len(values)))
+                keys = RangeIndex(len(values))
                 result = concat(values, axis=0, keys=keys)
 
         elif not not_indexed_same:

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -12,12 +12,10 @@
     cast,
     overload,
 )
-import warnings
 
 import numpy as np
 
 from pandas.util._decorators import cache_readonly
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_bool,
@@ -493,32 +491,27 @@ def _clean_keys_and_objs(
             objs_list = list(com.not_none(*objs_list))
         else:
             # GH#1649
-            clean_keys = []
+            key_indices = []
             clean_objs = []
             if is_iterator(keys):
                 keys = list(keys)
             if len(keys) != len(objs_list):
                 # GH#43485
-                warnings.warn(
-                    "The behavior of pd.concat with len(keys) != len(objs) is "
-                    "deprecated. In a future version this will raise instead of "
-                    "truncating to the smaller of the two sequences",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
+                raise ValueError(
+                    f"The length of the keys ({len(keys)}) must match "
+                    f"the length of the objects to concatenate ({len(objs_list)})"
                 )
-            for k, v in zip(keys, objs_list):
-                if v is None:
-                    continue
-                clean_keys.append(k)
-                clean_objs.append(v)
+            for i, obj in enumerate(objs_list):
+                if obj is not None:
+                    key_indices.append(i)
+                    clean_objs.append(obj)
             objs_list = clean_objs
 
-            if isinstance(keys, MultiIndex):
-                # TODO: retain levels?
-                keys = type(keys).from_tuples(clean_keys, names=keys.names)
-            else:
-                name = getattr(keys, "name", None)
-                keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None))
+            if not isinstance(keys, Index):
+                keys = Index(keys)
+
+            if len(key_indices) < len(keys):
+                keys = keys.take(key_indices)
 
         if len(objs_list) == 0:
             raise ValueError("All objects passed were None")

diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py
@@ -90,20 +90,22 @@ def test_frame_describe_multikey(tsframe):
 
 def test_frame_describe_tupleindex():
     # GH 14848 - regression from 0.19.0 to 0.19.1
-    df1 = DataFrame(
+    name = "k"
+    df = DataFrame(
         {
             "x": [1, 2, 3, 4, 5] * 3,
-            "y": [10, 20, 30, 40, 50] * 3,
-            "z": [100, 200, 300, 400, 500] * 3,
+            name: [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5,
         }
     )
-    df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
-    df2 = df1.rename(columns={"k": "key"})
-    msg = "Names should be list-like for a MultiIndex"
-    with pytest.raises(ValueError, match=msg):
-        df1.groupby("k").describe()
-    with pytest.raises(ValueError, match=msg):
-        df2.groupby("key").describe()
+    result = df.groupby(name).describe()
+    expected = DataFrame(
+        [[5.0, 3.0, 1.581139, 1.0, 2.0, 3.0, 4.0, 5.0]] * 3,
+        index=Index([(0, 0, 1), (0, 1, 0), (1, 0, 0)], tupleize_cols=False, name=name),
+        columns=MultiIndex.from_arrays(
+            [["x"] * 8, ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
 
 
 def test_frame_describe_unstacked_format():

diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -17,6 +17,7 @@
     Index,
     MultiIndex,
     PeriodIndex,
+    RangeIndex,
     Series,
     concat,
     date_range,
@@ -395,6 +396,29 @@ def test_concat_keys_with_none(self):
         expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"])
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("klass", [range, RangeIndex])
+    @pytest.mark.parametrize("include_none", [True, False])
+    def test_concat_preserves_rangeindex(self, klass, include_none):
+        df = DataFrame([1, 2])
+        df2 = DataFrame([3, 4])
+        data = [df, None, df2, None] if include_none else [df, df2]
+        keys_length = 4 if include_none else 2
+        result = concat(data, keys=klass(keys_length))
+        expected = DataFrame(
+            [1, 2, 3, 4],
+            index=MultiIndex(
+                levels=(
+                    RangeIndex(start=0, stop=keys_length, step=keys_length / 2),
+                    RangeIndex(start=0, stop=2, step=1),
+                ),
+                codes=(
+                    np.array([0, 0, 1, 1], dtype=np.int8),
+                    np.array([0, 1, 0, 1], dtype=np.int8),
+                ),
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
     def test_concat_bug_1719(self):
         ts1 = Series(
             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
@@ -705,7 +729,7 @@ def test_concat_multiindex_with_empty_rangeindex():
     # GH#41234
     mi = MultiIndex.from_tuples([("B", 1), ("C", 1)])
     df1 = DataFrame([[1, 2]], columns=mi)
-    df2 = DataFrame(index=[1], columns=pd.RangeIndex(0))
+    df2 = DataFrame(index=[1], columns=RangeIndex(0))
 
     result = concat([df1, df2])
     expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi)
@@ -830,14 +854,14 @@ def test_concat_mismatched_keys_length():
     sers = [ser + n for n in range(4)]
     keys = ["A", "B", "C"]
 
-    msg = r"The behavior of pd.concat with len\(keys\) != len\(objs\) is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    msg = r"The length of the keys"
+    with pytest.raises(ValueError, match=msg):
         concat(sers, keys=keys, axis=1)
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    with pytest.raises(ValueError, match=msg):
         concat(sers, keys=keys, axis=0)
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    with pytest.raises(ValueError, match=msg):
         concat((x for x in sers), keys=(y for y in keys), axis=1)
-    with tm.assert_produces_warning(FutureWarning, match=msg):
+    with pytest.raises(ValueError, match=msg):
         concat((x for x in sers), keys=(y for y in keys), axis=0)