pandas-dev · ahmedibrhm · Jul 8, 2022 · Jul 8, 2022 · Jul 8, 2022 · Jul 8, 2022
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -293,6 +293,8 @@ def __getitem__(self, item: PositionalIndexer):
             )
         # We are not an array indexer, so maybe e.g. a slice or integer
         # indexer. We dispatch to pyarrow.
+        if type(item) == np.int64:
+            item = item.item()
         value = self._data[item]
         if isinstance(value, pa.ChunkedArray):
             return type(self)(value)

@@ -733,6 +733,11 @@ def get_grouper(
     """
     group_axis = obj._get_axis(axis)
 
+    tuple_unified = False
+    if isinstance(key, list):
+        if len(key) == 1 and isinstance(key[0], str):
+            tuple_unified = True
+
     # validate that the passed single level is compatible with the passed
     # axis of the object
     if level is not None:
@@ -918,7 +923,12 @@ def is_in_obj(gpr) -> bool:
 
     # create the internals grouper
     grouper = ops.BaseGrouper(
-        group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna
+        group_axis,
+        groupings,
+        tuple_unified=tuple_unified,
+        sort=sort,
+        mutated=mutated,
+        dropna=dropna,
     )
     return grouper, frozenset(exclusions), obj
 

@@ -711,6 +711,7 @@ def __init__(
         self,
         axis: Index,
         groupings: Sequence[grouper.Grouping],
+        tuple_unified: bool = False,
         sort: bool = True,
         group_keys: bool = True,
         mutated: bool = False,
@@ -721,6 +722,7 @@ def __init__(
 
         self.axis = axis
         self._groupings: list[grouper.Grouping] = list(groupings)
+        self.tuple_unified = tuple_unified
         self._sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
@@ -779,13 +781,13 @@ def _get_grouper(self):
     @final
     @cache_readonly
     def group_keys_seq(self):
-        if len(self.groupings) == 1:
+        if len(self.groupings) == 1 and self.tuple_unified is False:
             return self.levels[0]
-        else:
-            ids, _, ngroups = self.group_info
 
-            # provide "flattened" iterator for multi-group setting
-            return get_flattened_list(ids, ngroups, self.levels, self.codes)
+        ids, _, ngroups = self.group_info
+
+        # provide "flattened" iterator for multi-group setting
+        return get_flattened_list(ids, ngroups, self.levels, self.codes)
 
     @final
     def apply(
@@ -1123,12 +1125,13 @@ def __init__(
         binlabels,
         mutated: bool = False,
         indexer=None,
+        tuple_unified: bool = False,
     ) -> None:
         self.bins = ensure_int64(bins)
         self.binlabels = ensure_index(binlabels)
         self.mutated = mutated
         self.indexer = indexer
-
+        self.tuple_unified = False
         # These lengths must match, otherwise we could call agg_series
         #  with empty self.bins, which would raise in libreduction.
         assert len(self.binlabels) == len(self.bins)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -161,6 +161,9 @@ def __internal_pivot_table(
                 pass
         values = list(values)
 
+    if isinstance(keys, list):
+        if len(keys) == 1:
+            keys = keys[0]
     grouped = data.groupby(keys, observed=observed, sort=sort)
     agged = grouped.agg(aggfunc)
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
@@ -367,7 +370,11 @@ def _all_key(key):
             margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc)
             cat_axis = 1
 
-            for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed):
+            for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed):
+                if isinstance(keys, tuple):
+                    (key,) = keys
+                else:
+                    key = keys
                 all_key = _all_key(key)
 
                 # we are going to mutate this, so need to copy!

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -175,7 +175,8 @@ def __init__(
         # For `hist` plot, need to get grouped original data before `self.data` is
         # updated later
         if self.by is not None and self._kind == "hist":
-            self._grouped = data.groupby(self.by)
+            bymodi = fix_groupby_singlelist_input(by)
+            self._grouped = data.groupby(bymodi)
 
         self.kind = kind
 
@@ -1829,3 +1830,10 @@ def blank_labeler(label, value):
             leglabels = labels if labels is not None else idx
             for p, l in zip(patches, leglabels):
                 self._append_legend_handles_labels(p, l)
+
+
+def fix_groupby_singlelist_input(keys):
+    if isinstance(keys, list):
+        if len(keys) == 1 and isinstance(keys[0], str):
+            keys = keys[0]
+    return keys
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
@@ -108,7 +108,8 @@ def reconstruct_data_with_by(
     1  3.0   4.0   NaN   NaN
     2  NaN   NaN   5.0   6.0
     """
-    grouped = data.groupby(by)
+    bymodi = fix_groupby_singlelist_input(by)
+    grouped = data.groupby(bymodi)
 
     data_list = []
     for key, group in grouped:
@@ -134,3 +135,10 @@ def reformat_hist_y_given_by(
     if by is not None and len(y.shape) > 1:
         return np.array([remove_na_arraylike(col) for col in y.T]).T
     return remove_na_arraylike(y)
+
+
+def fix_groupby_singlelist_input(keys):
+    if isinstance(keys, list):
+        if len(keys) == 1 and isinstance(keys[0], str):
+            keys = keys[0]
+    return keys
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
@@ -61,7 +61,8 @@ def _args_adjust(self):
         # where subplots are created based on by argument
         if is_integer(self.bins):
             if self.by is not None:
-                grouped = self.data.groupby(self.by)[self.columns]
+                bymodi = fix_groupby_singlelist_input(self.by)
+                grouped = self.data.groupby(bymodi)[self.columns]
                 self.bins = [self._calculate_bins(group) for key, group in grouped]
             else:
                 self.bins = self._calculate_bins(self.data)
@@ -265,6 +266,8 @@ def _grouped_plot(
     grouped = data.groupby(by)
     if column is not None:
         grouped = grouped[column]
+        if isinstance(by, list) and len(by) == 1:
+            by = [by]
 
     naxes = len(grouped)
     fig, axes = create_subplots(
@@ -522,3 +525,10 @@ def hist_frame(
     maybe_adjust_figure(fig, wspace=0.3, hspace=0.3)
 
     return axes
+
+
+def fix_groupby_singlelist_input(keys):
+    if isinstance(keys, list):
+        if len(keys) == 1 and isinstance(keys[0], str):
+            keys = keys[0]
+    return keys
diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py
@@ -392,7 +392,7 @@ def test_groupby_selection_other_methods(df):
     df.columns.name = "foo"
     df.index = rng
 
-    g = df.groupby(["A"])[["C"]]
+    g = df.groupby("A")[["C"]]
     g_exp = df[["C"]].groupby(df["A"])
 
     # methods which aren't just .foo()

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df):
     msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = grouped.mean()
-        expected = data.groupby(["A"]).mean()
+        expected = data.groupby("A").mean()
     expected.insert(0, "A", expected.index)
     expected.index = np.arange(len(expected))
     tm.assert_frame_equal(result, expected)
@@ -1259,7 +1259,7 @@ def test_consistency_name():
         }
     )
 
-    expected = df.groupby(["A"]).B.count()
+    expected = df.groupby("A").B.count()
     result = df.B.groupby(df.A).count()
     tm.assert_series_equal(result, expected)
 
@@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed():
     d["label"] = ["l1", "l2"]
     msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        tmp = d.groupby(["group"]).mean()
+        tmp = d.groupby("group").mean()
     res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
     tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
     tm.assert_numpy_array_equal(tmp.values, res_values)
@@ -1888,7 +1888,7 @@ def test_pivot_table_values_key_error():
 
 
 @pytest.mark.parametrize("columns", ["C", ["C"]])
-@pytest.mark.parametrize("keys", [["A"], ["A", "B"]])
+@pytest.mark.parametrize("keys", ["A", ["A", "B"]])
 @pytest.mark.parametrize(
     "values",
     [
@@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper():
     assert result.groups == expected.groups
 
 
-@pytest.mark.parametrize("group_name", ["x", ["x"]])
+@pytest.mark.parametrize("group_name", ["x"])
 def test_groupby_axis_1(group_name):
     # GH 27614
     df = DataFrame(
@@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype():
         index=Index(["M", "W"], dtype="object", name="MW"),
     )
 
-    gb = df.groupby(by=["MW"])
+    gb = df.groupby(by="MW")
     result = gb.sum()
     tm.assert_frame_equal(result, expected)
 
@@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype():
         index=Index([0, 1], dtype="int64", name="x"),
     )
 
-    gb = df.groupby(by=["x"])
+    gb = df.groupby(by="x")
     result = gb.sum()
     tm.assert_frame_equal(result, expected)
 
@@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype():
         index=Index([0, 1], dtype="int64", name="x"),
     )
 
-    gb = df.groupby(by=["x"])
+    gb = df.groupby(by="x")
     msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = gb.sum()
@@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value():
     )
     aggregate_details = {"Mood": Series.mode, "Credit": "sum"}
 
-    result = df.groupby(["Name"]).agg(aggregate_details)
+    result = df.groupby("Name").agg(aggregate_details)
     expected_result = DataFrame(
         {
             "Mood": [["happy", "sad"], "happy"],
@@ -2795,3 +2795,21 @@ def test_groupby_none_column_name():
     result = df.groupby(by=[None]).sum()
     expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None))
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_iterator_one_grouper():
+    df = DataFrame(columns=["a", "b", "c"], index=["x", "y"])
+    df.loc["y"] = Series({"a": 1, "b": 5, "c": 2})
+    expected = True
+
+    values, _ = next(iter(df.groupby(["a", "b"])))
+    result = isinstance(values, tuple)
+    assert result == expected
+
+    values, _ = next(iter(df.groupby(["a"])))
+    result = isinstance(values, tuple)
+    assert result == expected
+
+    values, _ = next(iter(df.groupby("a")))
+    result = isinstance(values, int)
+    assert result == expected
diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py
@@ -164,7 +164,7 @@ def test_nunique_with_timegrouper():
 def test_nunique_with_NaT(key, data, dropna, expected):
     # GH 27951
     df = DataFrame({"key": key, "data": data})
-    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
+    result = df.groupby("key")["data"].nunique(dropna=dropna)
     tm.assert_series_equal(result, expected)
 
 

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -1194,7 +1194,7 @@ def test_transform_lambda_with_datetimetz():
             "timezone": ["Etc/GMT+4", "US/Eastern"],
         }
     )
-    result = df.groupby(["timezone"])["time"].transform(
+    result = df.groupby("timezone")["time"].transform(
         lambda x: x.dt.tz_localize(x.name)
     )
     expected = Series(

diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py
@@ -269,7 +269,7 @@ class TestBoxWithBy(TestPlotBase):
                 ]
                 * 2,
             ),
-            (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2),
+            ("C", None, ["A", "B"], [["a", "b", "c"]] * 2),
         ],
     )
     def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):

diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -407,7 +407,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex):
     def test_join_hierarchical_mixed(self):
         # GH 2024
         df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"])
-        new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]})
+        new_df = df.groupby("a").agg({"b": [np.mean, np.sum]})
         other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"])
         other_df.set_index("a", inplace=True)
         # GH 9455, 12219
@@ -718,7 +718,9 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix
     # some smoke tests
     for c in join_col:
         assert result[c].notna().all()
-
+    if isinstance(join_col, list):
+        if len(join_col) == 1:
+            join_col = join_col[0]
     left_grouped = left.groupby(join_col)
     right_grouped = right.groupby(join_col)
 

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -860,7 +860,7 @@ def test_pivot_with_tuple_of_values(self, method):
                 pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz"))
 
     def _check_output(
-        self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All"
+        self, result, values_col, index=["A", "B"], columns="C", margins_col="All"
     ):
         col_margins = result.loc[result.index[:-1], margins_col]
         expected_col_margins = self.data.groupby(index)[values_col].mean()
-Original file line number
+Diff line change
@@ Expand Up / @@ -269,7 +269,7 @@ class TestBoxWithBy(TestPlotBase): @@
                     ]
                     * 2,
                 ),
-                (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2),
+                ("C", None, ["A", "B"], [["a", "b", "c"]] * 2),
             ],
         )
         def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):
@@ Expand Down @@