From 3901f956fa738e9edc6d4c58a7dc5210df80ec6e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 15 May 2021 16:39:11 -0700
Subject: [PATCH 1/3] BUG: columns name retention in groupby methods

---
 pandas/core/apply.py                          | 15 +++++++++++--
 pandas/core/groupby/generic.py                | 13 ++++++-----
 pandas/core/reshape/concat.py                 |  9 ++++++--
 .../tests/groupby/aggregate/test_aggregate.py | 17 +++++++++-----
 pandas/tests/groupby/test_groupby.py          | 22 ++++++++++++-------
 5 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index d0c6a1a841edb..da9738a31ad47 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -348,6 +348,7 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
 
         # multiples
         else:
+            indices = []
             for index, col in enumerate(selected_obj):
                 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
                 try:
@@ -369,7 +370,9 @@ def agg_list_like(self) -> FrameOrSeriesUnion:
                         raise
                 else:
                     results.append(new_res)
-                    keys.append(col)
+                    indices.append(index)
+
+            keys = selected_obj.columns.take(indices)
 
         # if we are empty
         if not len(results):
@@ -399,6 +402,7 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
         -------
         Result of aggregation.
         """
+        from pandas import Index
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -435,8 +439,15 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
             keys_to_use = [k for k in keys if not results[k].empty]
             # Have to check, if at least one DataFrame is not empty.
             keys_to_use = keys_to_use if keys_to_use != [] else keys
+            if selected_obj.ndim == 2:
+                # keys are columns, so we can preserve names
+                keys_to_use = Index(keys_to_use)
+                keys_to_use._set_names(selected_obj.columns.names)
+
             axis = 0 if isinstance(obj, ABCSeries) else 1
-            result = concat({k: results[k] for k in keys_to_use}, axis=axis)
+            result = concat(
+                {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
+            )
         elif any(is_ndframe):
             # There is a mix of NDFrames and scalars
             raise ValueError(
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5c28a15532174..3d7f98d8bfc98 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1031,13 +1031,15 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
                     if isinstance(sobj, Series):
                         # GH#35246 test_groupby_as_index_select_column_sum_empty_df
-                        result.columns = [sobj.name]
+                        result.columns = self._obj_with_exclusions.columns.copy()
                     else:
+                        # Retain our column names
+                        result.columns._set_names(
+                            sobj.columns.names, level=list(range(sobj.columns.nlevels))
+                        )
                         # select everything except for the last level, which is the one
                         # containing the name of the function(s), see GH#32040
-                        result.columns = result.columns.rename(
-                            [sobj.columns.name] * result.columns.nlevels
-                        ).droplevel(-1)
+                        result.columns = result.columns.droplevel(-1)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
@@ -1655,7 +1657,7 @@ def _wrap_transformed_output(
             result.columns = self.obj.columns
         else:
             columns = Index(key.label for key in output)
-            columns.name = self.obj.columns.name
+            columns._set_names(self.obj._get_axis(1 - self.axis).names)
             result.columns = columns
 
         result.index = self.obj.index
@@ -1790,7 +1792,6 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         results = self._apply_to_column_groupbys(
             lambda sgb: sgb.nunique(dropna), obj=obj
         )
-        results.columns.names = obj.columns.names  # TODO: do at higher level?
 
         if not self.as_index:
             results.index = Index(range(len(results)))
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index b3b453ea6355a..f9b652cdc48c2 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -358,8 +358,13 @@ def __init__(
                 clean_keys.append(k)
                 clean_objs.append(v)
             objs = clean_objs
-            name = getattr(keys, "name", None)
-            keys = Index(clean_keys, name=name)
+
+            if isinstance(keys, MultiIndex):
+                # TODO: retain levels?
+                keys = type(keys).from_tuples(clean_keys, names=keys.names)
+            else:
+                name = getattr(keys, "name", None)
+                keys = Index(clean_keys, name=name)
 
         if len(objs) == 0:
             raise ValueError("All objects passed were None")
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index b601ba92886d9..95afe4fcf26f4 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -298,13 +298,13 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
     # ohlc expands dimensions, so different test to the above is required.
     df = DataFrame(
         np.random.randn(1000, 3),
-        index=pd.date_range("1/1/2012", freq="S", periods=1000),
-        columns=["A", "B", "C"],
+        index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"),
+        columns=Index(["A", "B", "C"], name="alpha"),
     )
     result = df.resample("3T").agg(
         {"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
     )
-    expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
+    expected_index = pd.date_range("1/1/2012", freq="3T", periods=6, name="dti")
     expected_columns = MultiIndex.from_tuples(
         [
             ("A", "ohlc", "open"),
@@ -313,7 +313,8 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
             ("A", "ohlc", "close"),
             ("A", "quantile", "A"),
             ("A", "quantile", "A"),
-        ]
+        ],
+        names=["alpha", None, None],
     )
     non_ohlc_expected_values = np.array(
         [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
@@ -897,7 +898,12 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
 def test_multiindex_custom_func(func):
     # GH 31777
     data = [[1, 4, 2], [5, 7, 1]]
-    df = DataFrame(data, columns=MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]]))
+    df = DataFrame(
+        data,
+        columns=MultiIndex.from_arrays(
+            [[1, 1, 2], [3, 4, 3]], names=["Sisko", "Janeway"]
+        ),
+    )
     result = df.groupby(np.array([0, 1])).agg(func)
     expected_dict = {
         (1, 3): {0: 1.0, 1: 5.0},
@@ -905,6 +911,7 @@ def test_multiindex_custom_func(func):
         (2, 3): {0: 2.0, 1: 1.0},
     }
     expected = DataFrame(expected_dict)
+    expected.columns = df.columns
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 83aeb29ec53df..4aaa127949711 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -637,10 +637,11 @@ def test_as_index_select_column():
 
 def test_groupby_as_index_select_column_sum_empty_df():
     # GH 35246
-    df = DataFrame(columns=["A", "B", "C"])
+    df = DataFrame(columns=Index(["A", "B", "C"], name="alpha"))
     left = df.groupby(by="A", as_index=False)["B"].sum()
-    assert type(left) is DataFrame
-    assert left.to_dict() == {"A": {}, "B": {}}
+
+    expected = DataFrame(columns=df.columns[:2], index=range(0))
+    tm.assert_frame_equal(left, expected)
 
 
 def test_groupby_as_index_agg(df):
@@ -1865,8 +1866,8 @@ def test_groupby_agg_ohlc_non_first():
     # GH 21716
     df = DataFrame(
         [[1], [1]],
-        columns=["foo"],
-        index=date_range("2018-01-01", periods=2, freq="D"),
+        columns=Index(["foo"], name="mycols"),
+        index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
     expected = DataFrame(
@@ -1878,9 +1879,10 @@ def test_groupby_agg_ohlc_non_first():
                 ("foo", "ohlc", "high"),
                 ("foo", "ohlc", "low"),
                 ("foo", "ohlc", "close"),
-            )
+            ),
+            names=["mycols", None, None],
         ),
-        index=date_range("2018-01-01", periods=2, freq="D"),
+        index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
     result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
@@ -2052,7 +2054,11 @@ def test_groupby_duplicate_index():
 
 
 @pytest.mark.parametrize(
-    "idx", [Index(["a", "a"]), MultiIndex.from_tuples((("a", "a"), ("a", "a")))]
+    "idx",
+    [
+        Index(["a", "a"], name="foo"),
+        MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
+    ],
 )
 @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
 def test_dup_labels_output_shape(groupby_func, idx):

From 23de535615448205249fb905c44fa3fa3bb66e7c Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 16 May 2021 17:14:52 -0700
Subject: [PATCH 2/3] mypy fixup

---
 pandas/core/apply.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index da9738a31ad47..6425d8f75b75f 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -441,8 +441,11 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:
             keys_to_use = keys_to_use if keys_to_use != [] else keys
             if selected_obj.ndim == 2:
                 # keys are columns, so we can preserve names
-                keys_to_use = Index(keys_to_use)
-                keys_to_use._set_names(selected_obj.columns.names)
+                ktu = Index(keys_to_use)
+                ktu._set_names(selected_obj.columns.names)
+                # Incompatible types in assignment (expression has type "Index",
+                # variable has type "List[Hashable]")
+                keys_to_use = ktu  # type: ignore[assignment]
 
             axis = 0 if isinstance(obj, ABCSeries) else 1
             result = concat(

From bab04e138dd466402a98e5e165f2c55150d73603 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 31 May 2021 23:22:57 -0700
Subject: [PATCH 3/3] whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index e06085c4c5c26..9d0538f9001ec 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -233,7 +233,7 @@ Other enhancements
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
-- Improved error message in ``corr` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
+- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
 
 .. ---------------------------------------------------------------------------
 
@@ -1058,6 +1058,7 @@ Groupby/resample/rolling
 - Bug in :meth:`SeriesGroupBy` aggregations incorrectly returning empty :class:`Series` instead of raising ``TypeError`` on aggregations that are invalid for its dtype, e.g. ``.prod`` with ``datetime64[ns]`` dtype (:issue:`41342`)
 - Bug in :meth:`DataFrame.rolling.__iter__` where ``on`` was not assigned to the index of the resulting objects (:issue:`40373`)
 - Bug in :meth:`DataFrameGroupBy.transform` and :meth:`DataFrameGroupBy.agg` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`41647`)
+- Bug in :class:`DataFrameGroupBy` methods ``agg``, ``transform``, ``sum``, ``bfill``, ``ffill``, ``pad``, ``pct_change``, ``shift``, ``ohlc`` dropping ``.columns.names`` (:issue:`41497`)
 
 Reshaping
 ^^^^^^^^^