pandas-dev · ikramersh · Oct 22, 2021 · Oct 22, 2021 · Oct 22, 2021 · Oct 22, 2021
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8072,7 +8072,7 @@ def resample(
         5     18     100    2018-02-11
         6     17      40    2018-02-18
         7     19      50    2018-02-25
-        >>> df.resample('M', on='week_starting').mean()
+        >>> df.resample('M', on='week_starting')['price', 'volume'].mean()
                        price  volume
         week_starting
         2018-01-31     10.75    62.5

@@ -1224,7 +1224,7 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
                 else:
                     obj = self._obj_with_exclusions
                 check = obj._get_numeric_data()
-                if len(obj.columns) and not len(check.columns) and not obj.empty:
+                if len(obj.columns) > len(check.columns) and not obj.empty:
                     numeric_only = False
                     # TODO: v1.4+ Add FutureWarning
 

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
@@ -96,7 +96,7 @@ def test_in_numeric_groupby(self, data_for_grouping):
                 "C": [1, 1, 1, 1, 1, 1, 1, 1],
             }
         )
-        result = df.groupby("A").sum().columns
+        result = df.groupby("A").sum(numeric_only=True).columns
 
         if data_for_grouping.dtype._is_numeric:
             expected = pd.Index(["B", "C"])

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1749,7 +1749,7 @@ def test_stack_multiple_bug(self):
         multi = df.set_index(["DATE", "ID"])
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
-        down = unst.resample("W-THU").mean()
+        down = unst.loc[:, ["VAR1"]].resample("W-THU").mean()
 
         rs = down.stack("ID")
         xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -333,7 +333,7 @@ def test_observed(observed, using_array_manager):
     gb = df.groupby(["A", "B"], observed=observed)
     exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
     expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
-    result = gb.sum()
+    result = gb.sum(numeric_only=True)
     if not observed:
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -125,9 +125,17 @@ def df(self):
     @pytest.mark.parametrize("method", ["mean", "median"])
     def test_averages(self, df, method):
         # mean / median
-        expected_columns_numeric = Index(["int", "float", "category_int"])
-
-        gb = df.groupby("group")
+        expected_column_names = [
+            "int",
+            "float",
+            "category_int",
+            "datetime",
+            "datetimetz",
+            "timedelta",
+        ]
+        expected_columns_numeric = Index(expected_column_names)
+
+        gb = df[["group", *expected_column_names]].groupby("group")
         expected = DataFrame(
             {
                 "category_int": [7.5, 9],
@@ -154,10 +162,7 @@ def test_averages(self, df, method):
             ],
         )
 
-        with tm.assert_produces_warning(
-            FutureWarning, match="Dropping invalid", check_stacklevel=False
-        ):
-            result = getattr(gb, method)(numeric_only=False)
+        result = getattr(gb, method)()
         tm.assert_frame_equal(result.reindex_like(expected), expected)
 
         expected_columns = expected.columns
@@ -205,14 +210,9 @@ def test_first_last(self, df, method):
 
     @pytest.mark.parametrize("method", ["sum", "cumsum"])
     def test_sum_cumsum(self, df, method):
+        expected_columns = Index(["int", "float", "category_int"])
 
-        expected_columns_numeric = Index(["int", "float", "category_int"])
-        expected_columns = Index(
-            ["int", "float", "string", "category_int", "timedelta"]
-        )
-        if method == "cumsum":
-            # cumsum loses string
-            expected_columns = Index(["int", "float", "category_int", "timedelta"])
+        expected_columns_numeric = expected_columns
 
         self._check(df, method, expected_columns, expected_columns_numeric)
 
@@ -231,34 +231,38 @@ def test_cummin_cummax(self, df, method):
             ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"]
         )
 
-        # GH#15561: numeric_only=False set by default like min/max
         expected_columns_numeric = expected_columns
 
         self._check(df, method, expected_columns, expected_columns_numeric)
 
     def _check(self, df, method, expected_columns, expected_columns_numeric):
-        gb = df.groupby("group")
 
         # cummin, cummax dont have numeric_only kwarg, always use False
         warn = None
-        if method in ["cummin", "cummax"]:
+        if method in ["cummin", "cummax", "min", "max"]:
             # these dont have numeric_only kwarg, always use False
             warn = FutureWarning
-        elif method in ["min", "max"]:
-            # these have numeric_only kwarg, but default to False
-            warn = FutureWarning
+            df["object"] = [
+                None,
+                "y",
+                "z",
+            ]  # add a column that is non numeric and will be dropped
+            gb = df[["group", "object", *list(expected_columns_numeric)]].groupby(
+                "group"
+            )
+        else:
+            gb = df[["group", *list(expected_columns_numeric)]].groupby("group")
 
         with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
             result = getattr(gb, method)()
-
         tm.assert_index_equal(result.columns, expected_columns_numeric)
 
         # GH#41475 deprecated silently ignoring nuisance columns
         warn = None
         if len(expected_columns) < len(gb._obj_with_exclusions.columns):
             warn = FutureWarning
         with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
-            result = getattr(gb, method)(numeric_only=False)
+            result = getattr(gb, method)()
 
         tm.assert_index_equal(result.columns, expected_columns)
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -896,7 +896,7 @@ def test_keep_nuisance_agg(df, agg_function):
 )
 def test_omit_nuisance_agg(df, agg_function):
     # GH 38774, GH 38815
-    grouped = df.groupby("A")
+    grouped = df.groupby("A")["C", "D"]
     result = getattr(grouped, agg_function)()
     expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)()
     tm.assert_frame_equal(result, expected)
@@ -1126,8 +1126,8 @@ def test_groupby_with_hier_columns():
 def test_grouping_ndarray(df):
     grouped = df.groupby(df["A"].values)
 
-    result = grouped.sum()
-    expected = df.groupby("A").sum()
+    result = grouped.sum(numeric_only=True)
+    expected = df.groupby("A").sum(numeric_only=True)
     tm.assert_frame_equal(
         result, expected, check_names=False
     )  # Note: no names when grouping by value
@@ -2549,7 +2549,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype():
     )
 
     gb = df.groupby(by=["x"])
-    result = gb.sum()
+    result = gb.sum(numeric_only=True)
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -105,14 +105,14 @@ def test_groupby_with_timegrouper(self):
             )
             expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")
 
-            result1 = df.resample("5D").sum()
+            result1 = df.resample("5D").sum()["Quantity"].to_frame()
             tm.assert_frame_equal(result1, expected)
 
             df_sorted = df.sort_index()
-            result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
+            result2 = df_sorted.groupby(Grouper(freq="5D")).sum()["Quantity"].to_frame()
             tm.assert_frame_equal(result2, expected)
 
-            result3 = df.groupby(Grouper(freq="5D")).sum()
+            result3 = df.groupby(Grouper(freq="5D")).sum()["Quantity"].to_frame()
             tm.assert_frame_equal(result3, expected)
 
     @pytest.mark.parametrize("should_sort", [True, False])
@@ -185,8 +185,7 @@ def test_timegrouper_with_reg_groups(self):
                     ],
                 }
             ).set_index(["Date", "Buyer"])
-
-            result = df.groupby([Grouper(freq="A"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="A"), "Buyer"]).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
             expected = DataFrame(
@@ -201,7 +200,7 @@ def test_timegrouper_with_reg_groups(self):
                     ],
                 }
             ).set_index(["Date", "Buyer"])
-            result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
         df_original = DataFrame(
@@ -239,10 +238,10 @@ def test_timegrouper_with_reg_groups(self):
                 }
             ).set_index(["Date", "Buyer"])
 
-            result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum(numeric_only=True)
             expected = DataFrame(
                 {
                     "Buyer": "Carl Joe Mark".split(),
@@ -258,17 +257,23 @@ def test_timegrouper_with_reg_groups(self):
 
             # passing the name
             df = df.reset_index()
-            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum(
+                numeric_only=True
+            )
             tm.assert_frame_equal(result, expected)
 
             with pytest.raises(KeyError, match="'The grouper name foo is not found'"):
                 df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum()
 
             # passing the level
             df = df.set_index("Date")
-            result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum(
+                numeric_only=True
+            )
             tm.assert_frame_equal(result, expected)
-            result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum(
+                numeric_only=True
+            )
             tm.assert_frame_equal(result, expected)
 
             with pytest.raises(ValueError, match="The level foo is not valid"):
@@ -277,7 +282,9 @@ def test_timegrouper_with_reg_groups(self):
             # multi names
             df = df.copy()
             df["Date"] = df.index + offsets.MonthEnd(2)
-            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
+            result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum(
+                numeric_only=True
+            )
             expected = DataFrame(
                 {
                     "Buyer": "Carl Joe Mark".split(),
@@ -306,18 +313,18 @@ def test_timegrouper_with_reg_groups(self):
                     [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date"
                 ),
             )
-            result = df.groupby(Grouper(freq="1M")).sum()
+            result = df.groupby(Grouper(freq="1M")).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M")]).sum()
+            result = df.groupby([Grouper(freq="1M")]).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
             expected.index = expected.index.shift(1)
             assert expected.index.freq == offsets.MonthEnd()
-            result = df.groupby(Grouper(freq="1M", key="Date")).sum()
+            result = df.groupby(Grouper(freq="1M", key="Date")).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
-            result = df.groupby([Grouper(freq="1M", key="Date")]).sum()
+            result = df.groupby([Grouper(freq="1M", key="Date")]).sum(numeric_only=True)
             tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"])

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -90,7 +90,7 @@ def test_groupby_resample_on_api():
 
     expected = df.set_index("dates").groupby("key").resample("D").mean()
 
-    result = df.groupby("key").resample("D", on="dates").mean()
+    result = df.groupby("key").resample("D", on="dates").mean()["values"].to_frame()
     tm.assert_frame_equal(result, expected)
 
 
@@ -169,7 +169,7 @@ def tests_skip_nuisance(test_frame):
     tm.assert_frame_equal(result, expected)
 
     expected = r[["A", "B", "C"]].sum()
-    result = r.sum()
+    result = r.sum()[["A", "B", "C"]]
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -401,7 +401,7 @@ def test_resample_groupby_agg():
     df["date"] = pd.to_datetime(df["date"])
 
     resampled = df.groupby("cat").resample("Y", on="date")
-    expected = resampled.sum()
+    expected = resampled.sum()["num"].to_frame()
     result = resampled.agg({"num": "sum"})
 
     tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -559,7 +559,7 @@ def test_mixed_type_join_with_suffix(self):
         df.insert(5, "dt", "foo")
 
         grouped = df.groupby("id")
-        mn = grouped.mean()
+        mn = grouped.mean(numeric_only=True)
         cn = grouped.count()
 
         # it works!

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -898,14 +898,18 @@ def _check_output(
 
         # to help with a buglet
         self.data.columns = [k * 2 for k in self.data.columns]
-        table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean)
+        table = self.data[["AA", "DD", "EE", "FF"]].pivot_table(
+            index=["AA"], margins=True, aggfunc=np.mean
+        )
         for value_col in table.columns:
-            totals = table.loc[("All", ""), value_col]
+            totals = table.loc[("All"), value_col]
             assert totals == self.data[value_col].mean()
 
-        table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
+        table = self.data[["AA", "DD", "EE", "FF"]].pivot_table(
+            index=["AA"], margins=True, aggfunc="mean"
+        )
         for item in ["DD", "EE", "FF"]:
-            totals = table.loc[("All", ""), item]
+            totals = table.loc[("All"), item]
             assert totals == self.data[item].mean()
 
     @pytest.mark.parametrize(
@@ -959,7 +963,9 @@ def test_margin_with_only_columns_defined(
             }
         )
 
-        result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
+        result = df[["A", "B", "D", "E"]].pivot_table(
+            columns=columns, margins=True, aggfunc=aggfunc
+        )
         expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)
 
         tm.assert_frame_equal(result, expected)
@@ -1984,8 +1990,12 @@ def test_pivot_string_as_func(self):
     def test_pivot_string_func_vs_func(self, f, f_numpy):
         # GH #18713
         # for consistency purposes
-        result = pivot_table(self.data, index="A", columns="B", aggfunc=f)
-        expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy)
+        result = pivot_table(
+            self.data[["D", "E", "F"]], index="D", columns="E", aggfunc=f
+        )
+        expected = pivot_table(
+            self.data[["D", "E", "F"]], index="D", columns="E", aggfunc=f_numpy
+        )
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.slow