From c05b255ab44bb94ce593f75ee668b43246a3c361 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 23 May 2020 18:09:15 +0100 Subject: [PATCH 1/5] don't plot colorbar if c is column containing colors --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/plotting/_matplotlib/core.py | 13 ++++++++++--- pandas/tests/plotting/test_frame.py | 9 +++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 41d519e0765dc..bdd0f1b886350 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -803,7 +803,7 @@ Plotting - Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`) - Bug in :meth:`DataFrame.hist` where the order of ``column`` argument was ignored (:issue:`29235`) - Bug in :meth:`DataFrame.plot.scatter` that when adding multiple plots with different ``cmap``, colorbars alway use the first ``cmap`` (:issue:`33389`) - +- Bug in :meth:`DataFrame.plot.scatter` was adding a colorbar to the plot even if the argument `c` was assigned to a column containing color names (:issue:`34316`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a049ac99f0e08..ab894ae19fc55 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -951,9 +951,6 @@ def _make_plot(self): c_is_column = is_hashable(c) and c in self.data.columns - # plot a colorbar only if a colormap is provided or necessary - cb = self.kwds.pop("colorbar", self.colormap or c_is_column) - # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or "Greys" cmap = self.plt.cm.get_cmap(cmap) @@ -969,6 +966,16 @@ def _make_plot(self): else: c_values = c + # plot a colorbar only if a colormap is provided or necessary + from matplotlib.colors import is_color_like + + c_is_column_not_containing_colors = c_is_column and not all( + np.vectorize(is_color_like)(c_values) + ) + cb = self.kwds.pop( + "colorbar", self.colormap or c_is_column_not_containing_colors + ) + if self.legend and hasattr(self, "label"): label = self.label else: diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c84a09f21f46b..c88115cac6185 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1306,6 +1306,15 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") + def test_scatter_with_c_column_name_with_colors(self): + df = pd.DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species",) + assert ax.collections[0].colorbar is None + def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) From 6562795c40cf76c50dd95d4761562693bad67370 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 25 May 2020 09:26:26 +0100 Subject: [PATCH 2/5] check for numeric rather than is_color --- pandas/plotting/_matplotlib/core.py | 9 +++------ pandas/tests/plotting/test_frame.py | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index ab894ae19fc55..52a5c17df802b 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -13,6 +13,7 @@ is_iterator, is_list_like, is_number, + is_numeric_dtype, ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -966,12 +967,8 @@ def _make_plot(self): else: c_values = c - # plot a colorbar only if a colormap is provided or necessary - from matplotlib.colors import is_color_like - - c_is_column_not_containing_colors = c_is_column and not all( - np.vectorize(is_color_like)(c_values) - ) + # don't plot a colorbar if `c` is a column containing color names + c_is_column_not_containing_colors = c_is_column and is_numeric_dtype(c_values) cb = self.kwds.pop( "colorbar", self.colormap or c_is_column_not_containing_colors ) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c88115cac6185..2b378a56997e0 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1307,6 +1307,7 @@ def test_plot_scatter_with_c(self): df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") def test_scatter_with_c_column_name_with_colors(self): + # https://github.com/pandas-dev/pandas/issues/34316 df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], From e70c6509fcccedcac6dfb81f0e96ef6c94c61f2a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 25 May 2020 09:27:32 +0100 Subject: [PATCH 3/5] fit on one line --- pandas/plotting/_matplotlib/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 52a5c17df802b..1a8fc19f99aa7 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -968,10 +968,8 @@ def _make_plot(self): c_values = c # don't plot a colorbar if `c` is a column containing color names - c_is_column_not_containing_colors = c_is_column and is_numeric_dtype(c_values) - cb = self.kwds.pop( - "colorbar", self.colormap or c_is_column_not_containing_colors - ) + c_is_col_not_containing_colors = c_is_column and is_numeric_dtype(c_values) + cb = self.kwds.pop("colorbar", self.colormap or c_is_col_not_containing_colors) if self.legend and hasattr(self, "label"): label = self.label From 0f2c00a228d638a9d7edb8b94977785e70cd0f47 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 25 May 2020 09:31:21 +0100 Subject: [PATCH 4/5] rename --- pandas/plotting/_matplotlib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1a8fc19f99aa7..a52e37e084fe9 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -968,8 +968,8 @@ def _make_plot(self): c_values = c # don't plot a colorbar if `c` is a column containing color names - c_is_col_not_containing_colors = c_is_column and is_numeric_dtype(c_values) - cb = self.kwds.pop("colorbar", self.colormap or c_is_col_not_containing_colors) + c_is_numeric_col = c_is_column and is_numeric_dtype(c_values) + cb = self.kwds.pop("colorbar", self.colormap or c_is_numeric_col) if self.legend and hasattr(self, "label"): label = self.label From 4a60600dc83e25b517d3b073a4acfcf9d32354e5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 30 May 2020 18:45:21 +0100 Subject: [PATCH 5/5] take care of case when use passes cmap but c isn't numeric column --- pandas/plotting/_matplotlib/core.py | 8 +++++--- pandas/tests/plotting/test_frame.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a52e37e084fe9..622d9e18a4434 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -967,9 +967,11 @@ def _make_plot(self): else: c_values = c - # don't plot a colorbar if `c` is a column containing color names - c_is_numeric_col = c_is_column and is_numeric_dtype(c_values) - cb = self.kwds.pop("colorbar", self.colormap or c_is_numeric_col) + # plot colorbar if + # 1. colormap is assigned, and + # 2.`c` is a column containing only numeric values + plot_colorbar = self.colormap or c_is_column + cb = self.kwds.pop("colorbar", is_numeric_dtype(c_values) and plot_colorbar) if self.legend and hasattr(self, "label"): label = self.label diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 2b378a56997e0..8992e27a78d6b 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1306,14 +1306,15 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - def test_scatter_with_c_column_name_with_colors(self): + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) df["species"] = ["r", "r", "g", "g", "b"] - ax = df.plot.scatter(x=0, y=1, c="species",) + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None def test_plot_scatter_with_s(self):