From cb29cffe39e454000f321621d85bc1daaa4cb52e Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sat, 28 Mar 2020 17:59:20 -0400 Subject: [PATCH 01/12] added a test for scatter with variable marker size --- pandas/tests/plotting/test_frame.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 45ac18b2661c3..c64e6532bd361 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1306,6 +1306,16 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") + @pytest.mark.slow + def test_plot_scatter_with_s(self): + # this refers to GH 32904 + df = DataFrame( + np.random.random((10,3))*100, + columns=['a', 'b', 'c'], + ) + + _check_plot_works(df.plot.scatter(x='a', y='b', s='c')) + def test_scatter_colors(self): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) with pytest.raises(TypeError): From f5309c89b11629c299e0a50b0749792513ecdd8d Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sat, 28 Mar 2020 18:08:41 -0400 Subject: [PATCH 02/12] modified to pass black pandas --- pandas/tests/plotting/test_frame.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index c64e6532bd361..3d980761f2a83 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1309,12 +1309,9 @@ def test_plot_scatter_with_c(self): @pytest.mark.slow def test_plot_scatter_with_s(self): # this refers to GH 32904 - df = DataFrame( - np.random.random((10,3))*100, - columns=['a', 'b', 'c'], - ) + df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) - _check_plot_works(df.plot.scatter(x='a', y='b', s='c')) + _check_plot_works(df.plot.scatter(x="a", y="b", s="c")) def test_scatter_colors(self): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) From ab17f63b7afa2dd60c9963a7708ef62b4f7208a9 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sat, 28 Mar 2020 18:14:26 -0400 Subject: [PATCH 03/12] check if the size variable is a column --- pandas/plotting/_matplotlib/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 63d0b8abe59d9..bc8346fd48433 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -934,6 +934,8 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs): # hide the matplotlib default for size, in case we want to change # the handling of this argument later s = 20 + elif is_hashable(s) and s in data.columns: + s = data[s] super().__init__(data, x, y, s=s, **kwargs) if is_integer(c) and not self.data.columns.holds_integer(): c = self.data.columns[c] From 02c1890708744b2dc1ffb4f980658ff1bda29f5d Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sat, 28 Mar 2020 19:03:00 -0400 Subject: [PATCH 04/12] fixed incorrect use of _check_plot_works in a test --- pandas/tests/plotting/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 3d980761f2a83..e6790e2985aff 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1311,7 +1311,7 @@ def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) - _check_plot_works(df.plot.scatter(x="a", y="b", s="c")) + _check_plot_works(df.plot.scatter, x="a", y="b", s="c") def test_scatter_colors(self): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) From abb3d304c503a5be04420cfcea0d9631c94cc30a Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sat, 28 Mar 2020 19:06:15 -0400 Subject: [PATCH 05/12] added an entry to whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 20415bba99476..aeb0eeab0b3ba 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -452,6 +452,7 @@ Other - Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`) - Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`) - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) +- Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) .. --------------------------------------------------------------------------- From 4340a4159835fce961363e9e5f51f514ca6ba29d Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Sun, 29 Mar 2020 11:10:20 -0400 Subject: [PATCH 06/12] removed pytest.mark.slow --- pandas/tests/plotting/test_frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index e6790e2985aff..72fd718623b59 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1306,7 +1306,6 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - @pytest.mark.slow def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) From 7c092143694ce4e62f7467f3142b7bea94f92eff Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:10:19 -0400 Subject: [PATCH 07/12] added docstring to plot.scatter to indicate that column label is also accepted --- pandas/plotting/_core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index d3db539084609..899532553f06a 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1468,9 +1468,11 @@ def scatter(self, x, y, s=None, c=None, **kwargs): y : int or str The column name or column position to be used as vertical coordinates for each point. - s : scalar or array_like, optional + s : str, scalar or array_like, optional The size of each point. Possible values are: + - A string with the name of the column to use for marker's size. + - A single scalar so all points have the same size. - A sequence of scalars, which will be used for each point's size From 29b0d6c1a7b99c329f6167b5e20097a4fc3e18a0 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:15:21 -0400 Subject: [PATCH 08/12] added versionchanged note --- pandas/plotting/_core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 899532553f06a..4126685a2af95 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1471,7 +1471,9 @@ def scatter(self, x, y, s=None, c=None, **kwargs): s : str, scalar or array_like, optional The size of each point. Possible values are: - - A string with the name of the column to use for marker's size. + - A string with the name of the column to be used for marker's size. + .. versionchanged:: 1.1.0 + plot.scatter acccepts a string with the name of a column. - A single scalar so all points have the same size. From 8a94f08cae701d7d9afcd7238e7b2f7f2dfb55c5 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:23:45 -0400 Subject: [PATCH 09/12] explicitly verify that marker sizes correspond to the desired values --- pandas/tests/plotting/test_frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 72fd718623b59..db002cfe2130a 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1310,7 +1310,8 @@ def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) - _check_plot_works(df.plot.scatter, x="a", y="b", s="c") + ax = df.plot.scatter(x="a", y="b", s="c") + assert (df["c"].values == ax.collections[0].get_sizes()).all() def test_scatter_colors(self): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) From 4d5db4ad9b30bb51e9c9bbe130ceff4dfb989320 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:27:11 -0400 Subject: [PATCH 10/12] corrected the versionchanged directive --- pandas/plotting/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 4126685a2af95..765952ad8ef9e 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1471,9 +1471,9 @@ def scatter(self, x, y, s=None, c=None, **kwargs): s : str, scalar or array_like, optional The size of each point. Possible values are: - - A string with the name of the column to be used for marker's size. .. versionchanged:: 1.1.0 - plot.scatter acccepts a string with the name of a column. + + - A string with the name of the column to be used for marker's size. - A single scalar so all points have the same size. From 68969616560f47edde72960db432fd8b6ddd84b0 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:56:23 -0400 Subject: [PATCH 11/12] modified assertion to use tm --- pandas/tests/plotting/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index db002cfe2130a..08b33ee547a48 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1311,7 +1311,7 @@ def test_plot_scatter_with_s(self): df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],) ax = df.plot.scatter(x="a", y="b", s="c") - assert (df["c"].values == ax.collections[0].get_sizes()).all() + tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) def test_scatter_colors(self): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) From a24e573266e06771950131edf3a7b59707e31e66 Mon Sep 17 00:00:00 2001 From: Sultan Orazbayev Date: Tue, 31 Mar 2020 16:58:02 -0400 Subject: [PATCH 12/12] changed position of versionchanged --- pandas/plotting/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 765952ad8ef9e..e466a215091ea 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1471,8 +1471,6 @@ def scatter(self, x, y, s=None, c=None, **kwargs): s : str, scalar or array_like, optional The size of each point. Possible values are: - .. versionchanged:: 1.1.0 - - A string with the name of the column to be used for marker's size. - A single scalar so all points have the same size. @@ -1481,6 +1479,8 @@ def scatter(self, x, y, s=None, c=None, **kwargs): recursively. For instance, when passing [2,14] all points size will be either 2 or 14, alternatively. + .. versionchanged:: 1.1.0 + c : str, int or array_like, optional The color of each point. Possible values are: