pandas-dev · jreback · Jun 9, 2022 · Dec 1, 2019 · Dec 1, 2019 · Jan 17, 2020
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -259,6 +259,7 @@ Other enhancements
 - :meth:`DataFrame.applymap` can now accept kwargs to pass on to the user-provided ``func`` (:issue:`39987`)
 - Passing a :class:`DataFrame` indexer to ``iloc`` is now disallowed for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` (:issue:`39004`)
 - :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
+- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`).
 - :meth:`DataFrame.plot.scatter` can now accept a categorical column for the argument ``c`` (:issue:`12380`, :issue:`31357`)
 - :meth:`Series.loc` now raises a helpful error message when the Series has a :class:`MultiIndex` and the indexer has too many dimensions (:issue:`35349`)
 - :func:`read_stata` now supports reading data from compressed files (:issue:`26599`)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -649,8 +649,18 @@ class PlotAccessor(PandasObject):
         - 'hexbin' : hexbin plot (DataFrame only)
     ax : matplotlib axes object, default None
         An axes of the current figure.
-    subplots : bool, default False
-        Make separate subplots for each column.
+    subplots : bool or list of iterables, default False
+        Whether to group columns into subplots:
+
+        - ``False`` - no subplots will be used
+        - ``True`` - Make separate subplots for each column.
+        - ``sequence of sequences of str`` - create a subplot for each group of columns.
+          For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one
+          with columns 'a' and 'c', and one with columns 'b' and 'd'.
+          Remaining columns that aren't specified will be plotted in
+          additional subplots (one per column).
+          .. versionadded:: 1.3.0
+
     sharex : bool, default True if ax is None else False
         In case ``subplots=True``, share x axis and set some x axis labels
         to invisible; defaults to True if ax is None otherwise False if

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -1,8 +1,13 @@
 from __future__ import annotations
 
+from collections import (
+    Counter,
+    Iterable,
+)
 from typing import (
     TYPE_CHECKING,
     Hashable,
+    Sequence,
 )
 import warnings
 
@@ -103,7 +108,7 @@ def __init__(
         data,
         kind=None,
         by: IndexLabel | None = None,
-        subplots=False,
+        subplots: bool | Sequence[Sequence[str]] = False,
         sharex=None,
         sharey=False,
         use_index=True,
@@ -167,8 +172,7 @@ def __init__(
         self.kind = kind
 
         self.sort_columns = sort_columns
-
-        self.subplots = subplots
+        self.subplots = self._validate_subplots_kwarg(subplots)
 
         if sharex is None:
 
@@ -254,6 +258,98 @@ def __init__(
 
         self._validate_color_args()
 
+    def _validate_subplots_kwarg(
+        self, subplots: bool | Sequence[Sequence[str]]
+    ) -> bool | list[tuple[int, ...]]:
+        """
+        Validate the subplots parameter
+
+        - check type and content
+        - check for duplicate columns
+        - check for invalid column names
+        - convert column names into indices
+        - add missing columns in a group of their own
+        See comments in code below for more details.
+
+        Parameters
+        ----------
+        subplots : subplots parameters as passed to PlotAccessor
+
+        Returns
+        -------
+        validated subplots : a bool or a list of tuples of column indices. Columns
+        in the same tuple will be grouped together in the resulting plot.
+        """
+
+        if isinstance(subplots, bool):
+            return subplots
+        elif not isinstance(subplots, Iterable):
+            raise ValueError("subplots should be a bool or an iterable")
+
+        supported_kinds = (
+            "line",
+            "bar",
+            "barh",
+            "hist",
+            "kde",
+            "density",
+            "area",
+            "pie",
+        )
+        if self._kind not in supported_kinds:
+            raise ValueError(
+                "When subplots is an iterable, kind must be "
+                f"one of {', '.join(supported_kinds)}. Got {self._kind}."
+            )
+
+        if any(
+            not isinstance(group, Iterable) or isinstance(group, str)
+            for group in subplots
+        ):
+            raise ValueError(
+                "When subplots is an iterable, each entry "
+                "should be a list/tuple of column names."
+            )
+
+        cols_in_groups = [col for group in subplots for col in group]
+        duplicates = {col for (col, cnt) in Counter(cols_in_groups).items() if cnt > 1}
+        if duplicates:
+            raise ValueError(
+                f"Each column should be in only one subplot. Columns {duplicates} "
+                "were found in multiple sublots."
+            )
+
+        cols_in_groups_set = set(cols_in_groups)
+        cols_remaining = set(self.data.columns) - cols_in_groups_set
+        bad_columns = cols_in_groups_set - set(self.data.columns)
+
+        if bad_columns:
+            raise ValueError(
+                "Subplots contains the following column(s) "
+                f"which are invalid names: {bad_columns}"
+            )
+
+        # subplots is a list of tuples where each tuple is a group of
+        # columns to be grouped together (one ax per group).
+        # we consolidate the subplots list such that:
+        # - the tuples contain indices instead of column names
+        # - the columns that aren't yet in the list are added in a group
+        #   of their own.
+        # For example with columns from a to g, and
+        # subplots = [(a, c), (b, f, e)],
+        # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)]
+        # This way, we can handle self.subplots in a homogeneous manner
+        # later.
+        # TODO: also accept indices instead of just names?
+
+        out = []
+        index = list(self.data.columns).index
+        for group in subplots:
+            out.append(tuple(index(col) for col in group))
+        for col in cols_remaining:
+            out.append((index(col),))
+        return out
+
     def _validate_color_args(self):
         if (
             "color" in self.kwds
@@ -372,8 +468,11 @@ def _maybe_right_yaxis(self, ax: Axes, axes_num):
 
     def _setup_subplots(self):
         if self.subplots:
+            naxes = (
+                self.nseries if isinstance(self.subplots, bool) else len(self.subplots)
+            )
             fig, axes = create_subplots(
-                naxes=self.nseries,
+                naxes=naxes,
                 sharex=self.sharex,
                 sharey=self.sharey,
                 figsize=self.figsize,
@@ -785,9 +884,23 @@ def _get_ax_layer(cls, ax, primary=True):
         else:
             return getattr(ax, "right_ax", ax)
 
+    def _col_idx_to_axis_idx(self, col_idx: int) -> int:
+        """Return the index of the axis where the column at col_idx should be plotted"""
+        if isinstance(self.subplots, list):
+            # Subplots is a list: some columns will be grouped together in the same ax
+            return next(
+                group_idx
+                for (group_idx, group) in enumerate(self.subplots)
+                if col_idx in group
+            )
+        else:
+            # subplots is True: one ax per column
+            return col_idx
+
     def _get_ax(self, i: int):
         # get the twinx ax if appropriate
         if self.subplots:
+            i = self._col_idx_to_axis_idx(i)
             ax = self.axes[i]
             ax = self._maybe_right_yaxis(ax, i)
             self.axes[i] = ax

diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -2039,6 +2039,72 @@ def test_plot_no_numeric_data(self):
         with pytest.raises(TypeError, match="no numeric data to plot"):
             df.plot()
 
+    @td.skip_if_no_scipy
+    @pytest.mark.parametrize(
+        "kind", ("line", "bar", "barh", "hist", "kde", "density", "area", "pie")
+    )
+    def test_group_subplot(self, kind):
+        d = {
+            "a": np.arange(10),
+            "b": np.arange(10) + 1,
+            "c": np.arange(10) + 1,
+            "d": np.arange(10),
+            "e": np.arange(10),
+        }
+        df = DataFrame(d)
+
+        axes = df.plot(subplots=[("b", "e"), ("c", "d")], kind=kind)
+        assert len(axes) == 3  # 2 groups + single column a
+
+        expected_labels = (["b", "e"], ["c", "d"], ["a"])
+        for ax, labels in zip(axes, expected_labels):
+            if kind != "pie":
+                self._check_legend_labels(ax, labels=labels)
+            if kind == "line":
+                assert len(ax.lines) == len(labels)
+
+    @pytest.mark.parametrize(
+        "subplots, expected_msg",
+        [
+            (123, "subplots should be a bool or an iterable"),
+            ("a", "each entry should be a list/tuple"),  # iterable of non-iterable
+            ((1,), "each entry should be a list/tuple"),  # iterable of non-iterable
+            (("a",), "each entry should be a list/tuple"),  # iterable of strings
+        ],
+    )
+    def test_group_subplot_bad_input(self, subplots, expected_msg):
+        # Make sure error is raised when subplots is not a properly
+        # formatted iterable. Only iterables of iterables are permitted, and
+        # entries should not be strings.
+        d = {"a": np.arange(10), "b": np.arange(10)}
+        df = DataFrame(d)
+
+        with pytest.raises(ValueError, match=expected_msg):
+            df.plot(subplots=subplots)
+
+    def test_group_subplot_invalid_column_name(self):
+        d = {"a": np.arange(10), "b": np.arange(10)}
+        df = DataFrame(d)
+
+        with pytest.raises(ValueError, match="invalid names: {'bad_name'}"):
+            df.plot(subplots=[("a", "bad_name")])
+
+    def test_group_subplot_duplicated_column(self):
+        d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
+        df = DataFrame(d)
+
+        with pytest.raises(ValueError, match="should be in only one subplot"):
+            df.plot(subplots=[("a", "b"), ("a", "c")])
+
+    @pytest.mark.parametrize("kind", ("box", "scatter", "hexbin"))
+    def test_group_subplot_invalid_kind(self, kind):
+        d = {"a": np.arange(10), "b": np.arange(10)}
+        df = DataFrame(d)
+        with pytest.raises(
+            ValueError, match="When subplots is an iterable, kind must be one of"
+        ):
+            df.plot(subplots=[("a", "b")], kind=kind)
+
     @pytest.mark.parametrize(
         "index_name, old_label, new_label",
         [