From 469eff852bcf19278974962f2f6cd2a4df5c9cd6 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 1 Dec 2019 14:40:19 -0500 Subject: [PATCH 01/37] WIP --- pandas/plotting/_matplotlib/core.py | 42 ++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f2efed30c48e8..bcf5918fa5e96 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,6 +1,7 @@ import re from typing import Optional import warnings +from collections import Iterable import numpy as np @@ -115,6 +116,31 @@ def __init__( self.sort_columns = sort_columns self.subplots = subplots + if isinstance(self.subplots, Iterable): + # subplots is a list of tuples where each tuple is a group of + # columns to be grouped together (one ax per group). + # we consolidate the subplots list such that: + # - the tuples contain indexes instead of column names + # - the columns that aren't yet in the list are added in a group + # of their own. + # For example with columns from a to g, and + # subplots = [(a, c), (b, f, e)], + # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] + # This way, we can handle self.subplots in a homogeneous manner + # later. + # TODO: also accept indexes instead of just names? + # TODO: we're potentially messing with the order of the axes here + cols_in_groups = set(col for group in self.subplots for col in group) + cols_remaining = set(data.columns) - cols_in_groups + + subplots = [] + index = list(data.columns).index + for group in self.subplots: + subplots.append(tuple(index(col) for col in group)) + for col in cols_remaining: + subplots.append((index(col),)) + + self.subplots = subplots if sharex is None: if ax is None: @@ -323,8 +349,11 @@ def _maybe_right_yaxis(self, ax, axes_num): def _setup_subplots(self): if self.subplots: + naxes = ( + self.nseries if isinstance(self.subplots, bool) else len(self.subplots) + ) fig, axes = _subplots( - naxes=self.nseries, + naxes=naxes, sharex=self.sharex, sharey=self.sharey, figsize=self.figsize, @@ -690,9 +719,20 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) + def _col_idx_to_axis_idx(self, i): + if isinstance(self.subplots, list): + # Some columns are be grouped together in the same ax + for group_idx, group in enumerate(self.subplots): + if i in group: + return group_idx + else: + # One ax per column + return i + def _get_ax(self, i): # get the twinx ax if appropriate if self.subplots: + i = self._col_idx_to_axis_idx(i) ax = self.axes[i] ax = self._maybe_right_yaxis(ax, i) self.axes[i] = ax From b96a6590c511c8174af6f86ab61923bc8f36e4a2 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 1 Dec 2019 14:46:38 -0500 Subject: [PATCH 02/37] minimal test --- pandas/tests/plotting/test_frame.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index fd66888fc30e4..3062f579b19ce 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3250,6 +3250,19 @@ def test_plot_no_numeric_data(self): with pytest.raises(TypeError): df.plot() + def test_group_subplot(self): + d = { + 'a': np.arange(10), + 'b': np.arange(10) + 1, + 'c': np.arange(10) + 1, + 'd': np.arange(10), + 'e': np.arange(10), + } + df = pd.DataFrame(d) + + ax = df.plot(subplots=[('b', 'e'), ('c', 'd')]) + assert len(ax) == 3 # 2 groups + single column a + def _generate_4_axes_via_gridspec(): import matplotlib.pyplot as plt From 99cc0498ed934cd2150398b4fa49c05564b7e68f Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Jan 2020 07:50:15 -0500 Subject: [PATCH 03/37] formatted files --- pandas/tests/plotting/test_frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 3efd004073728..cdc9dd7e53d2b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3273,15 +3273,15 @@ def test_plot_no_numeric_data(self): def test_group_subplot(self): d = { - 'a': np.arange(10), - 'b': np.arange(10) + 1, - 'c': np.arange(10) + 1, - 'd': np.arange(10), - 'e': np.arange(10), + "a": np.arange(10), + "b": np.arange(10) + 1, + "c": np.arange(10) + 1, + "d": np.arange(10), + "e": np.arange(10), } df = pd.DataFrame(d) - ax = df.plot(subplots=[('b', 'e'), ('c', 'd')]) + ax = df.plot(subplots=[("b", "e"), ("c", "d")]) assert len(ax) == 3 # 2 groups + single column a def test_missing_markers_legend(self): From 06da9100d0c5e500575622403bc8401bd4e4cb97 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Jan 2020 08:24:21 -0500 Subject: [PATCH 04/37] linting again --- pandas/plotting/_matplotlib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 6502348880166..210ecdd4e3590 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,7 +1,7 @@ +from collections import Iterable import re from typing import Optional import warnings -from collections import Iterable import numpy as np @@ -130,7 +130,7 @@ def __init__( # later. # TODO: also accept indexes instead of just names? # TODO: we're potentially messing with the order of the axes here - cols_in_groups = set(col for group in self.subplots for col in group) + cols_in_groups = {col for group in self.subplots for col in group} cols_remaining = set(data.columns) - cols_in_groups subplots = [] From b370ba5d2ec712eb20eb77cdfa7438c5ca8ceb4e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 10:27:45 -0400 Subject: [PATCH 05/37] Check number of lines and labels --- pandas/tests/plotting/test_frame.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index e2d0a07dfe9a4..420847759892c 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3296,8 +3296,13 @@ def test_group_subplot(self): } df = pd.DataFrame(d) - ax = df.plot(subplots=[("b", "e"), ("c", "d")]) - assert len(ax) == 3 # 2 groups + single column a + axes = df.plot(subplots=[("b", "e"), ("c", "d")], legend=True) + assert len(axes) == 3 # 2 groups + single column a + + expected_labels = (["b", "e"], ["c", "d"], ["a"]) + for i in range(3): + self._check_legend_labels(axes[i], labels=expected_labels[i]) + assert len(axes[i].lines) == len(expected_labels[i]) def test_missing_markers_legend(self): # 14958 From d1a2ae0265231057e2f287b86d45901ca6f6fb4a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 11:12:57 -0400 Subject: [PATCH 06/37] Added input checks --- pandas/plotting/_matplotlib/core.py | 20 ++++++++++++++++- pandas/tests/plotting/test_frame.py | 34 ++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1333025ef1cbe..71e9048361cae 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -116,6 +116,8 @@ def __init__( self.sort_columns = sort_columns self.subplots = subplots + if not isinstance(self.subplots, (bool, Iterable)): + raise ValueError("subplots should be a bool or an iterable") if isinstance(self.subplots, Iterable): # subplots is a list of tuples where each tuple is a group of # columns to be grouped together (one ax per group). @@ -129,8 +131,24 @@ def __init__( # This way, we can handle self.subplots in a homogeneous manner # later. # TODO: also accept indexes instead of just names? - # TODO: we're potentially messing with the order of the axes here + + if any( + not isinstance(group, Iterable) or isinstance(group, str) + for group in subplots + ): + raise ValueError( + "When subplots is an iterable, each entry " + "should be a list/tuple of column names " + "or column indices." + ) cols_in_groups = {col for group in self.subplots for col in group} + bad_columns = {col for col in cols_in_groups if col not in data.columns} + if bad_columns: + raise ValueError( + "Subplots contains the following column(s) " + f"which are invalid names: {bad_columns}" + ) + cols_remaining = set(data.columns) - cols_in_groups subplots = [] diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 420847759892c..3c5e71d75422b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -408,6 +408,9 @@ def test_subplots(self): for ax in axes: assert ax.get_legend() is None + with pytest.raises(ValueError, match="should be a bool or an iterable"): + axes = df.plot(subplots=123) + def test_groupby_boxplot_sharey(self): # https://github.com/pandas-dev/pandas/issues/20968 # sharey can now be switched check whether the right @@ -3300,9 +3303,34 @@ def test_group_subplot(self): assert len(axes) == 3 # 2 groups + single column a expected_labels = (["b", "e"], ["c", "d"], ["a"]) - for i in range(3): - self._check_legend_labels(axes[i], labels=expected_labels[i]) - assert len(axes[i].lines) == len(expected_labels[i]) + for ax, labels in zip(axes, expected_labels): + self._check_legend_labels(ax, labels=labels) + assert len(ax.lines) == len(labels) + + @pytest.mark.parametrize( + "subplots", + [ + "a", # iterable of non-iterable + (1,), # iterable of non-iterable + ("a",), # iterable of strings + ], + ) + def test_group_subplot_bad_input(self, subplots): + # Make sure error is raised when subplots is not a properly + # formatted iterable. Only iterables of iterables are permitted, and + # entries should not be strings. + d = {"a": np.arange(10), "b": np.arange(10)} + df = pd.DataFrame(d) + + with pytest.raises(ValueError, match="each entry should be a list/tuple"): + df.plot(subplots=subplots) + + def test_group_subplot_invalid_columns(self): + d = {"a": np.arange(10), "b": np.arange(10)} + df = pd.DataFrame(d) + + with pytest.raises(ValueError, match="invalid names: {'bad_name'}"): + df.plot(subplots=[("a", "bad_name")]) def test_missing_markers_legend(self): # 14958 From 10ac3634d480554ba4bfaebf6228e1b7ed934e7c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 11:37:45 -0400 Subject: [PATCH 07/37] some comments --- pandas/plotting/_matplotlib/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 71e9048361cae..0c62284055e6f 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -723,15 +723,16 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) - def _col_idx_to_axis_idx(self, i): + def _col_idx_to_axis_idx(self, col_idx): + """Return the index of the axis where the column at col_idx should be plotted""" if isinstance(self.subplots, list): - # Some columns are be grouped together in the same ax + # Subplots is a list: some columns are be grouped together in the same ax for group_idx, group in enumerate(self.subplots): - if i in group: + if col_idx in group: return group_idx else: - # One ax per column - return i + # subplots is True: one ax per column + return col_idx def _get_ax(self, i): # get the twinx ax if appropriate From 9774d42f7f0a8efd98086a25a82090159cf453c6 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 11:41:33 -0400 Subject: [PATCH 08/37] Whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 58ac2b4cba3b7..fa9ff9d6a4cc9 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -87,6 +87,7 @@ Other enhancements - Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) +- :meth:`DataFrame.plot` will now allow for the ``subplots`` parameter to be a list of tuples so that columns may be grouped together in the same plot. (:issue:`29944`) - .. --------------------------------------------------------------------------- From 572de08ed18f6d814f10cd8942d06857840e906e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 13:36:55 -0400 Subject: [PATCH 09/37] More tests --- pandas/plotting/_matplotlib/core.py | 46 ++++++++++++++++++++--------- pandas/tests/plotting/test_frame.py | 24 +++++++++++---- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0c62284055e6f..38ed2213c7ca7 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -119,18 +119,22 @@ def __init__( if not isinstance(self.subplots, (bool, Iterable)): raise ValueError("subplots should be a bool or an iterable") if isinstance(self.subplots, Iterable): - # subplots is a list of tuples where each tuple is a group of - # columns to be grouped together (one ax per group). - # we consolidate the subplots list such that: - # - the tuples contain indexes instead of column names - # - the columns that aren't yet in the list are added in a group - # of their own. - # For example with columns from a to g, and - # subplots = [(a, c), (b, f, e)], - # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] - # This way, we can handle self.subplots in a homogeneous manner - # later. - # TODO: also accept indexes instead of just names? + + supported_kinds = ( + "line", + "bar", + "barh", + "hist", + "kde", + "density", + "area", + "pie", + ) + if self._kind not in supported_kinds: + raise ValueError( + "When subplots is an iterable, kind must be " + f"one of {', '.join(supported_kinds)}. Got {self._kind}" + ) if any( not isinstance(group, Iterable) or isinstance(group, str) @@ -141,15 +145,29 @@ def __init__( "should be a list/tuple of column names " "or column indices." ) + cols_in_groups = {col for group in self.subplots for col in group} - bad_columns = {col for col in cols_in_groups if col not in data.columns} + cols_remaining = set(data.columns) - cols_in_groups + bad_columns = cols_in_groups - set(data.columns) + if bad_columns: raise ValueError( "Subplots contains the following column(s) " f"which are invalid names: {bad_columns}" ) - cols_remaining = set(data.columns) - cols_in_groups + # subplots is a list of tuples where each tuple is a group of + # columns to be grouped together (one ax per group). + # we consolidate the subplots list such that: + # - the tuples contain indexes instead of column names + # - the columns that aren't yet in the list are added in a group + # of their own. + # For example with columns from a to g, and + # subplots = [(a, c), (b, f, e)], + # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] + # This way, we can handle self.subplots in a homogeneous manner + # later. + # TODO: also accept indexes instead of just names? subplots = [] index = list(data.columns).index diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 3c5e71d75422b..3d1366075414f 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3289,7 +3289,10 @@ def test_plot_no_numeric_data(self): with pytest.raises(TypeError): df.plot() - def test_group_subplot(self): + @pytest.mark.parametrize( + "kind", ("line", "bar", "barh", "hist", "kde", "density", "area", "pie") + ) + def test_group_subplot(self, kind): d = { "a": np.arange(10), "b": np.arange(10) + 1, @@ -3299,13 +3302,15 @@ def test_group_subplot(self): } df = pd.DataFrame(d) - axes = df.plot(subplots=[("b", "e"), ("c", "d")], legend=True) + axes = df.plot(subplots=[("b", "e"), ("c", "d")], kind=kind) assert len(axes) == 3 # 2 groups + single column a expected_labels = (["b", "e"], ["c", "d"], ["a"]) for ax, labels in zip(axes, expected_labels): - self._check_legend_labels(ax, labels=labels) - assert len(ax.lines) == len(labels) + if kind != "pie": + self._check_legend_labels(ax, labels=labels) + if kind == "line": + assert len(ax.lines) == len(labels) @pytest.mark.parametrize( "subplots", @@ -3325,13 +3330,22 @@ def test_group_subplot_bad_input(self, subplots): with pytest.raises(ValueError, match="each entry should be a list/tuple"): df.plot(subplots=subplots) - def test_group_subplot_invalid_columns(self): + def test_group_subplot_invalid_column_name(self): d = {"a": np.arange(10), "b": np.arange(10)} df = pd.DataFrame(d) with pytest.raises(ValueError, match="invalid names: {'bad_name'}"): df.plot(subplots=[("a", "bad_name")]) + @pytest.mark.parametrize("kind", ("box", "scatter", "hexbin")) + def test_group_subplot_invalid_kind(self, kind): + d = {"a": np.arange(10), "b": np.arange(10)} + df = pd.DataFrame(d) + with pytest.raises( + ValueError, match="When subplots is an iterable, kind must be one of" + ): + df.plot(subplots=[("a", "b")], kind=kind) + def test_missing_markers_legend(self): # 14958 df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) From 4519f2244ff098f9f0beaa9fc41552bb33a8993b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Mar 2020 14:12:35 -0400 Subject: [PATCH 10/37] Skip test if no scipy for kde --- pandas/tests/plotting/test_frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 3d1366075414f..ed10236a1adf5 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3289,6 +3289,7 @@ def test_plot_no_numeric_data(self): with pytest.raises(TypeError): df.plot() + @td.skip_if_no_scipy @pytest.mark.parametrize( "kind", ("line", "bar", "barh", "hist", "kde", "density", "area", "pie") ) From b804e5754be89b470b0d1420e90d30a23c4b7605 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 14 Apr 2020 10:35:17 -0400 Subject: [PATCH 11/37] Address comments --- pandas/plotting/_core.py | 9 +++++++++ pandas/plotting/_matplotlib/core.py | 27 ++++++++++++--------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e466a215091ea..fdcdb612d4b20 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -606,6 +606,15 @@ class PlotAccessor(PandasObject): - 'pie' : pie plot - 'scatter' : scatter plot - 'hexbin' : hexbin plot. + subplots : bool or list of iterables + + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each column + * ``list of iterables`` - create a subplots for each group of columns. + For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one + with columns 'a' and 'c', and one with columns 'b' and 'd'. + Remaining columns that aren't specified will be grouped into a + third additional subplot. figsize : a tuple (width, height) in inches use_index : bool, default True diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 24b3ceee9e7d0..da375f8b3c0ce 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -115,11 +115,9 @@ def __init__( self.sort_columns = sort_columns - self.subplots = subplots - if not isinstance(self.subplots, (bool, Iterable)): - raise ValueError("subplots should be a bool or an iterable") - if isinstance(self.subplots, Iterable): - + if isinstance(subplots, bool): + self.subplots = subplots + elif isinstance(subplots, Iterable): supported_kinds = ( "line", "bar", @@ -133,7 +131,7 @@ def __init__( if self._kind not in supported_kinds: raise ValueError( "When subplots is an iterable, kind must be " - f"one of {', '.join(supported_kinds)}. Got {self._kind}" + f"one of {', '.join(supported_kinds)}. Got {self._kind}." ) if any( @@ -142,11 +140,10 @@ def __init__( ): raise ValueError( "When subplots is an iterable, each entry " - "should be a list/tuple of column names " - "or column indices." + "should be a list/tuple of column names." ) - cols_in_groups = {col for group in self.subplots for col in group} + cols_in_groups = {col for group in subplots for col in group} cols_remaining = set(data.columns) - cols_in_groups bad_columns = cols_in_groups - set(data.columns) @@ -169,14 +166,14 @@ def __init__( # later. # TODO: also accept indexes instead of just names? - subplots = [] + self.subplots = [] index = list(data.columns).index - for group in self.subplots: - subplots.append(tuple(index(col) for col in group)) + for group in subplots: + self.subplots.append(tuple(index(col) for col in group)) for col in cols_remaining: - subplots.append((index(col),)) - - self.subplots = subplots + self.subplots.append((index(col),)) + else: + raise ValueError("subplots should be a bool or an iterable") if sharex is None: if ax is None: From d7ee47ca44a018b3db2ae0127cef02bb11c095d8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 14 Apr 2020 10:37:56 -0400 Subject: [PATCH 12/37] fix docstring --- pandas/plotting/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index fdcdb612d4b20..8e837e0afd8a2 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -613,8 +613,8 @@ class PlotAccessor(PandasObject): * ``list of iterables`` - create a subplots for each group of columns. For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one with columns 'a' and 'c', and one with columns 'b' and 'd'. - Remaining columns that aren't specified will be grouped into a - third additional subplot. + Remaining columns that aren't specified will be plotted in + additional subplots (one per column). figsize : a tuple (width, height) in inches use_index : bool, default True From 8fb298d4ad577cd40345f5d2e0ba136a7ac7a8df Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 14 Apr 2020 10:51:23 -0400 Subject: [PATCH 13/37] Added error when columns are fouund in multiple subplots --- pandas/plotting/_matplotlib/core.py | 14 ++++++++++++-- pandas/tests/plotting/test_frame.py | 7 +++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index da375f8b3c0ce..ba13827ff3c3c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,4 +1,4 @@ -from collections import Iterable +from collections import Iterable, Counter import re from typing import Optional import warnings @@ -143,7 +143,17 @@ def __init__( "should be a list/tuple of column names." ) - cols_in_groups = {col for group in subplots for col in group} + cols_in_groups = [col for group in subplots for col in group] + duplicates = { + col for (col, cnt) in Counter(cols_in_groups).items() if cnt > 1 + } + if duplicates: + raise ValueError( + f"Each column should be in only one subplot. Columns {duplicates} " + "were found in multiple sublots." + ) + + cols_in_groups = set(cols_in_groups) cols_remaining = set(data.columns) - cols_in_groups bad_columns = cols_in_groups - set(data.columns) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 1f2337c689aa6..2dc61f972f8b0 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3364,6 +3364,13 @@ def test_group_subplot_invalid_column_name(self): with pytest.raises(ValueError, match="invalid names: {'bad_name'}"): df.plot(subplots=[("a", "bad_name")]) + def test_group_subplot_duplicated_column(self): + d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)} + df = pd.DataFrame(d) + + with pytest.raises(ValueError, match="should be in only one subplot"): + df.plot(subplots=[("a", "b"), ("a", "c")]) + @pytest.mark.parametrize("kind", ("box", "scatter", "hexbin")) def test_group_subplot_invalid_kind(self, kind): d = {"a": np.arange(10), "b": np.arange(10)} From ba0f9823e7e20bfea9bd6c4a9b71bcf7f20561b4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 14 Apr 2020 13:09:54 -0400 Subject: [PATCH 14/37] sorted imports --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index ba13827ff3c3c..a50f5cf77e6e5 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,4 +1,4 @@ -from collections import Iterable, Counter +from collections import Counter, Iterable import re from typing import Optional import warnings From f81d29548536b79c3afae416299440bd1398e9f8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 08:10:26 -0400 Subject: [PATCH 15/37] Added type annotations --- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/core.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index f5ab27f617512..d07d1e08d413e 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -640,7 +640,7 @@ class PlotAccessor(PandasObject): - ``False`` - no subplots will be used - ``True`` - Make separate subplots for each column. - - ``list of iterables`` - create a subplots for each group of columns. + - ``list of iterables`` - create a subplot for each group of columns. For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one with columns 'a' and 'c', and one with columns 'b' and 'd'. Remaining columns that aren't specified will be plotted in diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 9707d2025b272..76e39eefe1841 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,6 +1,6 @@ from collections import Counter, Iterable import re -from typing import List, Optional +from typing import List, Optional, Sequence, Union import warnings from matplotlib.artist import Artist @@ -85,7 +85,7 @@ def __init__( data, kind=None, by=None, - subplots=False, + subplots: Optional[Union[bool, Sequence[Sequence[Union[str, int]]]]] = False, sharex=None, sharey=False, use_index=True, @@ -122,7 +122,9 @@ def __init__( self.sort_columns = sort_columns if isinstance(subplots, bool): - self.subplots = subplots + self.subplots = ( + subplots + ) # type: Optional[Union[bool, Sequence[Sequence[Union[str, int]]]]] elif isinstance(subplots, Iterable): supported_kinds = ( "line", @@ -159,9 +161,9 @@ def __init__( "were found in multiple sublots." ) - cols_in_groups = set(cols_in_groups) - cols_remaining = set(data.columns) - cols_in_groups - bad_columns = cols_in_groups - set(data.columns) + cols_in_groups_set = set(cols_in_groups) + cols_remaining = set(data.columns) - cols_in_groups_set + bad_columns = cols_in_groups_set - set(data.columns) if bad_columns: raise ValueError( From 8255405920769822975d2fcaf28c5e9860fd1f40 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 08:15:21 -0400 Subject: [PATCH 16/37] some more --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 76e39eefe1841..b081bc7087784 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -767,7 +767,7 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) - def _col_idx_to_axis_idx(self, col_idx): + def _col_idx_to_axis_idx(self, col_idx: int): """Return the index of the axis where the column at col_idx should be plotted""" if isinstance(self.subplots, list): # Subplots is a list: some columns are be grouped together in the same ax From f54459ed9ea35503b7ab42185853f974fe91fadd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 08:18:37 -0400 Subject: [PATCH 17/37] typos --- pandas/plotting/_matplotlib/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index b081bc7087784..034d4d475e7c6 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -174,7 +174,7 @@ def __init__( # subplots is a list of tuples where each tuple is a group of # columns to be grouped together (one ax per group). # we consolidate the subplots list such that: - # - the tuples contain indexes instead of column names + # - the tuples contain indices instead of column names # - the columns that aren't yet in the list are added in a group # of their own. # For example with columns from a to g, and @@ -182,7 +182,7 @@ def __init__( # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] # This way, we can handle self.subplots in a homogeneous manner # later. - # TODO: also accept indexes instead of just names? + # TODO: also accept indices instead of just names? self.subplots = [] index = list(data.columns).index @@ -770,7 +770,7 @@ def _get_ax_layer(cls, ax, primary=True): def _col_idx_to_axis_idx(self, col_idx: int): """Return the index of the axis where the column at col_idx should be plotted""" if isinstance(self.subplots, list): - # Subplots is a list: some columns are be grouped together in the same ax + # Subplots is a list: some columns will be grouped together in the same ax for group_idx, group in enumerate(self.subplots): if col_idx in group: return group_idx From f962425c02664b9218710dfe555cc13b66375efa Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 13:04:06 -0400 Subject: [PATCH 18/37] Addressed comments --- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/core.py | 148 ++++++++++++++-------------- 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index d07d1e08d413e..3a357fef8a73b 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -640,7 +640,7 @@ class PlotAccessor(PandasObject): - ``False`` - no subplots will be used - ``True`` - Make separate subplots for each column. - - ``list of iterables`` - create a subplot for each group of columns. + - ``sequence of sequences of str`` - create a subplot for each group of columns. For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one with columns 'a' and 'c', and one with columns 'b' and 'd'. Remaining columns that aren't specified will be plotted in diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 034d4d475e7c6..72cb6d12f15f2 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -85,7 +85,7 @@ def __init__( data, kind=None, by=None, - subplots: Optional[Union[bool, Sequence[Sequence[Union[str, int]]]]] = False, + subplots: Union[bool, Sequence[Sequence[Label]]] = False, sharex=None, sharey=False, use_index=True, @@ -120,78 +120,7 @@ def __init__( self.kind = kind self.sort_columns = sort_columns - - if isinstance(subplots, bool): - self.subplots = ( - subplots - ) # type: Optional[Union[bool, Sequence[Sequence[Union[str, int]]]]] - elif isinstance(subplots, Iterable): - supported_kinds = ( - "line", - "bar", - "barh", - "hist", - "kde", - "density", - "area", - "pie", - ) - if self._kind not in supported_kinds: - raise ValueError( - "When subplots is an iterable, kind must be " - f"one of {', '.join(supported_kinds)}. Got {self._kind}." - ) - - if any( - not isinstance(group, Iterable) or isinstance(group, str) - for group in subplots - ): - raise ValueError( - "When subplots is an iterable, each entry " - "should be a list/tuple of column names." - ) - - cols_in_groups = [col for group in subplots for col in group] - duplicates = { - col for (col, cnt) in Counter(cols_in_groups).items() if cnt > 1 - } - if duplicates: - raise ValueError( - f"Each column should be in only one subplot. Columns {duplicates} " - "were found in multiple sublots." - ) - - cols_in_groups_set = set(cols_in_groups) - cols_remaining = set(data.columns) - cols_in_groups_set - bad_columns = cols_in_groups_set - set(data.columns) - - if bad_columns: - raise ValueError( - "Subplots contains the following column(s) " - f"which are invalid names: {bad_columns}" - ) - - # subplots is a list of tuples where each tuple is a group of - # columns to be grouped together (one ax per group). - # we consolidate the subplots list such that: - # - the tuples contain indices instead of column names - # - the columns that aren't yet in the list are added in a group - # of their own. - # For example with columns from a to g, and - # subplots = [(a, c), (b, f, e)], - # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] - # This way, we can handle self.subplots in a homogeneous manner - # later. - # TODO: also accept indices instead of just names? - - self.subplots = [] - index = list(data.columns).index - for group in subplots: - self.subplots.append(tuple(index(col) for col in group)) - for col in cols_remaining: - self.subplots.append((index(col),)) - else: - raise ValueError("subplots should be a bool or an iterable") + self.subplots = self._validate_subplots_kwarg(subplots) if sharex is None: if ax is None: @@ -271,6 +200,79 @@ def __init__( self._validate_color_args() + def _validate_subplots_kwarg( + self, subplots: Union[bool, Sequence[Sequence[Label]]] + ) -> Union[bool, Sequence[Sequence[Label]]]: + + if isinstance(subplots, bool): + return subplots + elif not isinstance(subplots, Iterable): + raise ValueError("subplots should be a bool or an iterable") + + supported_kinds = ( + "line", + "bar", + "barh", + "hist", + "kde", + "density", + "area", + "pie", + ) + if self._kind not in supported_kinds: + raise ValueError( + "When subplots is an iterable, kind must be " + f"one of {', '.join(supported_kinds)}. Got {self._kind}." + ) + + if any( + not isinstance(group, Iterable) or isinstance(group, str) + for group in subplots + ): + raise ValueError( + "When subplots is an iterable, each entry " + "should be a list/tuple of column names." + ) + + cols_in_groups = [col for group in subplots for col in group] + duplicates = {col for (col, cnt) in Counter(cols_in_groups).items() if cnt > 1} + if duplicates: + raise ValueError( + f"Each column should be in only one subplot. Columns {duplicates} " + "were found in multiple sublots." + ) + + cols_in_groups_set = set(cols_in_groups) + cols_remaining = set(self.data.columns) - cols_in_groups_set + bad_columns = cols_in_groups_set - set(self.data.columns) + + if bad_columns: + raise ValueError( + "Subplots contains the following column(s) " + f"which are invalid names: {bad_columns}" + ) + + # subplots is a list of tuples where each tuple is a group of + # columns to be grouped together (one ax per group). + # we consolidate the subplots list such that: + # - the tuples contain indices instead of column names + # - the columns that aren't yet in the list are added in a group + # of their own. + # For example with columns from a to g, and + # subplots = [(a, c), (b, f, e)], + # we end up with [(ai, ci), (bi, fi, ei), (di,), (gi,)] + # This way, we can handle self.subplots in a homogeneous manner + # later. + # TODO: also accept indices instead of just names? + + out = [] + index = list(self.data.columns).index + for group in subplots: + out.append(tuple(index(col) for col in group)) + for col in cols_remaining: + out.append((index(col),)) + return out + def _validate_color_args(self): import matplotlib.colors From dda80b322046b93a1b042671a3eef751613cc48e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 13:05:24 -0400 Subject: [PATCH 19/37] Specify stricter output type --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 72cb6d12f15f2..996222ba67eb6 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -202,7 +202,7 @@ def __init__( def _validate_subplots_kwarg( self, subplots: Union[bool, Sequence[Sequence[Label]]] - ) -> Union[bool, Sequence[Sequence[Label]]]: + ) -> Union[bool, Sequence[Sequence[int]]]: if isinstance(subplots, bool): return subplots From a457b385daa768eca358aa37a321f796bd5f580b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 13:06:08 -0400 Subject: [PATCH 20/37] Again --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 996222ba67eb6..06511fc5353da 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -201,7 +201,7 @@ def __init__( self._validate_color_args() def _validate_subplots_kwarg( - self, subplots: Union[bool, Sequence[Sequence[Label]]] + self, subplots: Union[bool, Sequence[Sequence[str]]] ) -> Union[bool, Sequence[Sequence[int]]]: if isinstance(subplots, bool): From 8d85ba9e2e54b556e5a51c2d3c3592bc90dfebcd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 21 Jul 2020 13:32:56 -0400 Subject: [PATCH 21/37] hopefully fix mypy? --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 06511fc5353da..1ff3ad2c501e6 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -85,7 +85,7 @@ def __init__( data, kind=None, by=None, - subplots: Union[bool, Sequence[Sequence[Label]]] = False, + subplots: Union[bool, Sequence[Sequence[str]]] = False, sharex=None, sharey=False, use_index=True, From fdd761064080889c8cdee254b37afbc13e2b053c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 1 Sep 2020 12:51:44 -0400 Subject: [PATCH 22/37] added test for coverage --- pandas/tests/plotting/test_frame.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 1668523af94b8..8cdc2030ec86b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3354,21 +3354,22 @@ def test_group_subplot(self, kind): assert len(ax.lines) == len(labels) @pytest.mark.parametrize( - "subplots", + "subplots, expected_msg", [ - "a", # iterable of non-iterable - (1,), # iterable of non-iterable - ("a",), # iterable of strings + (123, "subplots should be a bool or an iterable"), + ("a", "each entry should be a list/tuple"), # iterable of non-iterable + ((1,), "each entry should be a list/tuple"), # iterable of non-iterable + (("a",), "each entry should be a list/tuple"), # iterable of strings ], ) - def test_group_subplot_bad_input(self, subplots): + def test_group_subplot_bad_input(self, subplots, expected_msg): # Make sure error is raised when subplots is not a properly # formatted iterable. Only iterables of iterables are permitted, and # entries should not be strings. d = {"a": np.arange(10), "b": np.arange(10)} df = pd.DataFrame(d) - with pytest.raises(ValueError, match="each entry should be a list/tuple"): + with pytest.raises(ValueError, match=expected_msg): df.plot(subplots=subplots) def test_group_subplot_invalid_column_name(self): From 44785974bb3bcc948b1501756ed9d7db41813bb4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sat, 5 Sep 2020 09:41:36 -0400 Subject: [PATCH 23/37] changed to whatsnew 112 --- doc/source/whatsnew/v1.1.0.rst | 3 --- doc/source/whatsnew/v1.1.2.rst | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d7dead21f132f..a49b29d691692 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -341,9 +341,6 @@ Other enhancements - :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`) - ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`) - :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for `yerr` and/or `xerr`, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`) -- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be - a list of iterables specifying column groups, so that columns may be - grouped together in the same subplot (:issue:`29688`). .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 39850905f60fa..ac65995143bf8 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -43,6 +43,7 @@ Bug fixes Other ~~~~~ - :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`) +- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). .. --------------------------------------------------------------------------- From ac512021457f9e258eb235d347097a7fe9cc4549 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sat, 5 Sep 2020 16:53:11 -0400 Subject: [PATCH 24/37] used proper whatsnew file --- doc/source/whatsnew/v1.1.2.rst | 1 - doc/source/whatsnew/v1.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index ac65995143bf8..39850905f60fa 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -43,7 +43,6 @@ Bug fixes Other ~~~~~ - :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`) -- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e65daa439a225..1de604c7aa0e7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -103,6 +103,7 @@ Other enhancements - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) +- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). - - From ea6c73dd8356bd87a2dea24a2af88b9020e27dc7 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 6 Nov 2020 08:07:34 +0000 Subject: [PATCH 25/37] unwgnjgrwnjwgnjrkknrjg --- pandas/tests/plotting/test_frame.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index d6717887febce..816c75f39a88e 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3400,7 +3400,7 @@ def test_group_subplot(self, kind): "d": np.arange(10), "e": np.arange(10), } - df = pd.DataFrame(d) + df = DataFrame(d) axes = df.plot(subplots=[("b", "e"), ("c", "d")], kind=kind) assert len(axes) == 3 # 2 groups + single column a @@ -3426,21 +3426,21 @@ def test_group_subplot_bad_input(self, subplots, expected_msg): # formatted iterable. Only iterables of iterables are permitted, and # entries should not be strings. d = {"a": np.arange(10), "b": np.arange(10)} - df = pd.DataFrame(d) + df = DataFrame(d) with pytest.raises(ValueError, match=expected_msg): df.plot(subplots=subplots) def test_group_subplot_invalid_column_name(self): d = {"a": np.arange(10), "b": np.arange(10)} - df = pd.DataFrame(d) + df = DataFrame(d) with pytest.raises(ValueError, match="invalid names: {'bad_name'}"): df.plot(subplots=[("a", "bad_name")]) def test_group_subplot_duplicated_column(self): d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)} - df = pd.DataFrame(d) + df = DataFrame(d) with pytest.raises(ValueError, match="should be in only one subplot"): df.plot(subplots=[("a", "b"), ("a", "c")]) @@ -3448,7 +3448,7 @@ def test_group_subplot_duplicated_column(self): @pytest.mark.parametrize("kind", ("box", "scatter", "hexbin")) def test_group_subplot_invalid_kind(self, kind): d = {"a": np.arange(10), "b": np.arange(10)} - df = pd.DataFrame(d) + df = DataFrame(d) with pytest.raises( ValueError, match="When subplots is an iterable, kind must be one of" ): From 34515a84f11264a74c22a61e707af05d9ee8862c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 17 Jan 2021 10:14:45 +0000 Subject: [PATCH 26/37] Addressed comments --- pandas/plotting/_core.py | 2 ++ pandas/plotting/_matplotlib/core.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 98dc6fbb632d2..6eb8a867551bd 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -645,6 +645,8 @@ class PlotAccessor(PandasObject): with columns 'a' and 'c', and one with columns 'b' and 'd'. Remaining columns that aren't specified will be plotted in additional subplots (one per column). + .. versionadded:: 1.3.0 + sharex : bool, default True if ax is None else False In case ``subplots=True``, share x axis and set some x axis labels to invisible; defaults to True if ax is None otherwise False if diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index c8b3e77473662..a190a0d76fe37 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -213,7 +213,26 @@ def __init__( def _validate_subplots_kwarg( self, subplots: Union[bool, Sequence[Sequence[str]]] - ) -> Union[bool, Sequence[Sequence[int]]]: + ) -> Union[bool, List[Tuple[int]]]: + """ + Validate the subplots parameter + + - check type and content + - check for duplicate columns + - check for invalid column names + - convert column names into indices + - add missing columns in a group of their own + See comments in code below for more details. + + Parameters + ---------- + subplots : subplots parameters as passed to PlotAccessor + + Returns + ------- + validated subplots : a bool or a list of tuples of column indices. Columns + in the same tuple will be grouped together in the resulting plot. + """ if isinstance(subplots, bool): return subplots From ae90509e83ca8b13e9c8b297daf9d5d3af74cbff Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 17 Jan 2021 10:24:04 +0000 Subject: [PATCH 27/37] remove code due to merge mess up --- pandas/tests/plotting/frame/test_frame.py | 409 ---------------------- 1 file changed, 409 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index c94fe22520c9e..1ff4d38592b04 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -352,415 +352,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - with pytest.raises(ValueError, match="should be a bool or an iterable"): - axes = df.plot(subplots=123) - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - def test_negative_log(self): df = -DataFrame( np.random.rand(6, 4), From 827c5b5899dab9721dee1a34b2c9be04741c9c6a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 17 Jan 2021 15:37:59 +0000 Subject: [PATCH 28/37] fix mypy --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a190a0d76fe37..4c0a32f4098cf 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -213,7 +213,7 @@ def __init__( def _validate_subplots_kwarg( self, subplots: Union[bool, Sequence[Sequence[str]]] - ) -> Union[bool, List[Tuple[int]]]: + ) -> Union[bool, List[Tuple[int, ...]]]: """ Validate the subplots parameter From b75d86ce150470fdf0efce9c9f13d3b8dbd985ea Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 10 May 2021 13:32:47 +0100 Subject: [PATCH 29/37] merge again --- pandas/plotting/_matplotlib/core.py | 11 +++-------- pandas/tests/plotting/frame/test_frame.py | 3 +-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 339e8855f9e2e..0df663298f0e4 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -7,11 +7,7 @@ from typing import ( TYPE_CHECKING, Hashable, - List, - Optional, Sequence, - Tuple, - Union, ) import warnings @@ -109,7 +105,7 @@ def __init__( data, kind=None, by=None, - subplots: Union[bool, Sequence[Sequence[str]]] = False, + subplots: bool | Sequence[Sequence[str]] = False, sharex=None, sharey=False, use_index=True, @@ -229,9 +225,8 @@ def __init__( self._validate_color_args() def _validate_subplots_kwarg( - self, subplots: Union[bool, Sequence[Sequence[str]]] - ) -> Union[bool, -[Tuple[int, ...]]]: + self, subplots: bool | Sequence[Sequence[str]] + ) -> bool | [tuple[int, ...]]: """ Validate the subplots parameter diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 7c9d1aa5e3677..c762018ad8f57 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -919,8 +919,7 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @td. - + @td.skip_if_no_scipy def test_kde_df(self): df = DataFrame(np.random.randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") From 55928ff136ac61c336facae22e25c126bd011e37 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 10 May 2021 22:29:24 +0100 Subject: [PATCH 30/37] mypy for the billionth time --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0df663298f0e4..3cc0db3ad4f37 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -226,7 +226,7 @@ def __init__( def _validate_subplots_kwarg( self, subplots: bool | Sequence[Sequence[str]] - ) -> bool | [tuple[int, ...]]: + ) -> bool | list[tuple[int, ...]]: """ Validate the subplots parameter From 43070d3ad56ec311fde0331193ad5b9991a00a18 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 8 Jun 2021 09:35:19 +0100 Subject: [PATCH 31/37] add return type --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3cc0db3ad4f37..f8e584a920963 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -835,7 +835,7 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) - def _col_idx_to_axis_idx(self, col_idx: int): + def _col_idx_to_axis_idx(self, col_idx: int) -> int: """Return the index of the axis where the column at col_idx should be plotted""" if isinstance(self.subplots, list): # Subplots is a list: some columns will be grouped together in the same ax From e3c01468e2c2b43a7a88322468c0de912873b402 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 8 Jun 2021 10:49:54 +0100 Subject: [PATCH 32/37] add line --- pandas/plotting/_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a51a3888a4324..9df60c1d90754 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -652,6 +652,7 @@ class PlotAccessor(PandasObject): ax : matplotlib axes object, default None An axes of the current figure. subplots : bool or list of iterables, default False + Whether to group columns into subplots: - ``False`` - no subplots will be used - ``True`` - Make separate subplots for each column. From a4c26762dc2e1607e1fef6e619ab2e94860564ad Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 8 Jun 2021 13:00:55 +0100 Subject: [PATCH 33/37] mypy for the billionth time + 1 --- pandas/plotting/_matplotlib/core.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f8e584a920963..0b47f27e3f8b1 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -839,9 +839,11 @@ def _col_idx_to_axis_idx(self, col_idx: int) -> int: """Return the index of the axis where the column at col_idx should be plotted""" if isinstance(self.subplots, list): # Subplots is a list: some columns will be grouped together in the same ax - for group_idx, group in enumerate(self.subplots): - if col_idx in group: - return group_idx + return next( + group_idx + for (group_idx, group) in enumerate(self.subplots) + if col_idx in group + ) else: # subplots is True: one ax per column return col_idx From 9b445467002d28af8a0d3fc9cb47111389608d32 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jan 2022 09:34:25 +0000 Subject: [PATCH 34/37] update whatnew --- doc/source/whatsnew/v1.3.0.rst | 1 - doc/source/whatsnew/v1.5.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c9e9fed080a92..ed66861efad93 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -259,7 +259,6 @@ Other enhancements - :meth:`DataFrame.applymap` can now accept kwargs to pass on to the user-provided ``func`` (:issue:`39987`) - Passing a :class:`DataFrame` indexer to ``iloc`` is now disallowed for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` (:issue:`39004`) - :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`) -- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). - :meth:`DataFrame.plot.scatter` can now accept a categorical column for the argument ``c`` (:issue:`12380`, :issue:`31357`) - :meth:`Series.loc` now raises a helpful error message when the Series has a :class:`MultiIndex` and the indexer has too many dimensions (:issue:`35349`) - :func:`read_stata` now supports reading data from compressed files (:issue:`26599`) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2152b640e67f5..a3c85485309f3 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -32,6 +32,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) +- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). - .. --------------------------------------------------------------------------- From 8cd63426eec09a763c786be32cb75688eb4f36ee Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 29 Mar 2022 13:21:18 +0100 Subject: [PATCH 35/37] import Iterable from typing. --- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/core.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c34a3fb446b43..4387dcf91ae93 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -659,7 +659,7 @@ class PlotAccessor(PandasObject): with columns 'a' and 'c', and one with columns 'b' and 'd'. Remaining columns that aren't specified will be plotted in additional subplots (one per column). - .. versionadded:: 1.3.0 + .. versionadded:: 1.5.0 sharex : bool, default True if ax is None else False In case ``subplots=True``, share x axis and set some x axis labels diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f5d16c0c5504e..9ab56bf5ac430 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,12 +1,10 @@ from __future__ import annotations -from collections import ( - Counter, - Iterable, -) +from collections import Counter from typing import ( TYPE_CHECKING, Hashable, + Iterable, Sequence, ) import warnings From 48c9f32fa0d19e384de30db1721543d23b15032a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 3 Jun 2022 08:42:11 -0700 Subject: [PATCH 36/37] Process groups to indices in one pass --- pandas/plotting/_core.py | 16 +++--- pandas/plotting/_matplotlib/core.py | 65 ++++++++++++++--------- pandas/tests/plotting/frame/test_frame.py | 20 ++++++- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index dc59f0b9a457d..929ddb52aea6d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -649,16 +649,16 @@ class PlotAccessor(PandasObject): - 'hexbin' : hexbin plot (DataFrame only) ax : matplotlib axes object, default None An axes of the current figure. - subplots : bool or list of iterables, default False + subplots : bool or sequence of iterables, default False Whether to group columns into subplots: - - ``False`` - no subplots will be used - - ``True`` - Make separate subplots for each column. - - ``sequence of sequences of str`` - create a subplot for each group of columns. - For example `[('a', 'c'), ('b', 'd')]` will create 2 subplots: one - with columns 'a' and 'c', and one with columns 'b' and 'd'. - Remaining columns that aren't specified will be plotted in - additional subplots (one per column). + - ``False`` : No subplots will be used + - ``True`` : Make separate subplots for each column. + - sequence of iterables of column labels: Create a subplot for each + group of columns. For example `[('a', 'c'), ('b', 'd')]` will + create 2 subplots: one with columns 'a' and 'c', and one + with columns 'b' and 'd'. Remaining columns that aren't specified + will be plotted in additional subplots (one per column). .. versionadded:: 1.5.0 sharex : bool, default True if ax is None else False diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 965131b19dd26..0352d1adcbf1e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections import Counter from typing import ( TYPE_CHECKING, Hashable, @@ -299,31 +298,22 @@ def _validate_subplots_kwarg( f"one of {', '.join(supported_kinds)}. Got {self._kind}." ) - if any( - not isinstance(group, Iterable) or isinstance(group, str) - for group in subplots - ): - raise ValueError( - "When subplots is an iterable, each entry " - "should be a list/tuple of column names." + if isinstance(self.data, ABCSeries): + raise NotImplementedError( + "An iterable subplots for a Series is not supported." ) - cols_in_groups = [col for group in subplots for col in group] - duplicates = {col for (col, cnt) in Counter(cols_in_groups).items() if cnt > 1} - if duplicates: - raise ValueError( - f"Each column should be in only one subplot. Columns {duplicates} " - "were found in multiple sublots." + columns = self.data.columns + if isinstance(columns, ABCMultiIndex): + raise NotImplementedError( + "An iterable subplots for a DataFrame with a MultiIndex column " + "is not supported." ) - cols_in_groups_set = set(cols_in_groups) - cols_remaining = set(self.data.columns) - cols_in_groups_set - bad_columns = cols_in_groups_set - set(self.data.columns) - - if bad_columns: - raise ValueError( - "Subplots contains the following column(s) " - f"which are invalid names: {bad_columns}" + if columns.nunique() != len(columns): + raise NotImplementedError( + "An iterable subplots for a DataFrame with non-unique column " + "labels is not supported." ) # subplots is a list of tuples where each tuple is a group of @@ -340,11 +330,34 @@ def _validate_subplots_kwarg( # TODO: also accept indices instead of just names? out = [] - index = list(self.data.columns).index + seen_columns = set() for group in subplots: - out.append(tuple(index(col) for col in group)) - for col in cols_remaining: - out.append((index(col),)) + if not is_list_like(group): + raise ValueError( + "When subplots is an iterable, each entry " + "should be a list/tuple of column names." + ) + idx_locs = columns.get_indexer_for(group) + if (idx_locs == -1).any(): + bad_labels = np.extract(idx_locs == -1, group) + raise ValueError( + f"Column label(s) {list(bad_labels)} not found in the DataFrame." + ) + else: + unique_columns = set(group) + duplicates = seen_columns.intersection(unique_columns) + if duplicates: + raise ValueError( + "Each column should be in only one subplot. " + f"Columns {duplicates} were found in multiple subplots." + ) + seen_columns = seen_columns.union(unique_columns) + out.append(tuple(idx_locs)) + + unseen_columns = columns.difference(seen_columns) + for column in unseen_columns: + idx_loc = columns.get_loc(column) + out.append((idx_loc,)) return out def _validate_color_args(self): diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index ad5ec2a170ecc..3ec3744e43653 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -2095,6 +2095,24 @@ def test_group_subplot(self, kind): if kind == "line": assert len(ax.lines) == len(labels) + def test_group_subplot_series_notimplemented(self): + ser = Series(range(1)) + msg = "An iterable subplots for a Series" + with pytest.raises(NotImplementedError, match=msg): + ser.plot(subplots=[("a",)]) + + def test_group_subplot_multiindex_notimplemented(self): + df = DataFrame(np.eye(2), columns=MultiIndex.from_tuples([(0, 1), (1, 2)])) + msg = "An iterable subplots for a DataFrame with a MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + df.plot(subplots=[(0, 1)]) + + def test_group_subplot_nonunique_cols_notimplemented(self): + df = DataFrame(np.eye(2), columns=["a", "a"]) + msg = "An iterable subplots for a DataFrame with non-unique" + with pytest.raises(NotImplementedError, match=msg): + df.plot(subplots=[("a",)]) + @pytest.mark.parametrize( "subplots, expected_msg", [ @@ -2118,7 +2136,7 @@ def test_group_subplot_invalid_column_name(self): d = {"a": np.arange(10), "b": np.arange(10)} df = DataFrame(d) - with pytest.raises(ValueError, match="invalid names: {'bad_name'}"): + with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"): df.plot(subplots=[("a", "bad_name")]) def test_group_subplot_duplicated_column(self): From 69efd1874d02f6eea5a56dd8958bb843920834c7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 3 Jun 2022 12:12:57 -0700 Subject: [PATCH 37/37] Fix typing check --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0352d1adcbf1e..5fceb14b9d1cc 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -330,7 +330,7 @@ def _validate_subplots_kwarg( # TODO: also accept indices instead of just names? out = [] - seen_columns = set() + seen_columns: set[Hashable] = set() for group in subplots: if not is_list_like(group): raise ValueError(