From 38a639abde3779be98444f219faed2f2cec6160d Mon Sep 17 00:00:00 2001 From: brendandrury <72849852+brendandrury@users.noreply.github.com> Date: Tue, 12 Oct 2021 14:44:51 -0700 Subject: [PATCH 1/4] Call to_numeric before get_numeric_data This converts numeric objects to numeric types before discarding non-numeric types. --- pandas/plotting/_matplotlib/boxplot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 1308a83f61443..6762399ae1a89 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -390,6 +390,7 @@ def plot_group(keys, values, ax: Axes): rc = {"figure.figsize": figsize} if figsize is not None else {} with plt.rc_context(rc): ax = plt.gca() + data = data.apply(pd.to_numeric, errors='ignore') data = data._get_numeric_data() if columns is None: columns = data.columns From 55a58bb1069b57cfb115a4db8f2661b7d8b67488 Mon Sep 17 00:00:00 2001 From: brendandrury <72849852+brendandrury@users.noreply.github.com> Date: Tue, 12 Oct 2021 14:50:08 -0700 Subject: [PATCH 2/4] Add test for groupby with object --- pandas/tests/plotting/test_boxplot_method.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index dbceeae44a493..45f345c5a251d 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -542,6 +542,14 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] assert expected_xticklabel == result_xticklabel + + def test_groupby_boxplot_object(self): + # GH 43480 + df = self.hist_df.astype("object") + grouped = df.groupby("gender") + _check_plot_works( + grouped.boxplot, subplots=False + ) def test_boxplot_multiindex_column(self): # GH 16748 From a0ebc6e44670c810bdd20f4c91a10714d7a5bb2e Mon Sep 17 00:00:00 2001 From: Brendan Drury Date: Tue, 12 Oct 2021 15:16:55 -0700 Subject: [PATCH 3/4] Linting --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/plotting/_matplotlib/boxplot.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index a3dc95781ee93..24e3890448d95 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -485,7 +485,7 @@ Period Plotting ^^^^^^^^ -- +- Fixed bug in :meth:`boxplot._grouped_plot_by_column` where trying to plot data of dtype ``object`` caused plotting to fail even if some data was numeric - Groupby/resample/rolling diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 6762399ae1a89..9b2906f07ab3b 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -390,7 +390,7 @@ def plot_group(keys, values, ax: Axes): rc = {"figure.figsize": figsize} if figsize is not None else {} with plt.rc_context(rc): ax = plt.gca() - data = data.apply(pd.to_numeric, errors='ignore') + data = data.apply(pd.to_numeric, errors="ignore") data = data._get_numeric_data() if columns is None: columns = data.columns diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 45f345c5a251d..834f4a4c03727 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -542,14 +542,12 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] assert expected_xticklabel == result_xticklabel - + def test_groupby_boxplot_object(self): # GH 43480 df = self.hist_df.astype("object") grouped = df.groupby("gender") - _check_plot_works( - grouped.boxplot, subplots=False - ) + _check_plot_works(grouped.boxplot, subplots=False) def test_boxplot_multiindex_column(self): # GH 16748 From d554935c3dcb3e4101ada30c3975ff4b2a6e7364 Mon Sep 17 00:00:00 2001 From: Brendan Drury Date: Mon, 1 Nov 2021 12:42:17 -0700 Subject: [PATCH 4/4] Raise ValueError instead of coercing non-numeric data to numeric types --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/plotting/_matplotlib/boxplot.py | 6 +++++- pandas/tests/plotting/test_boxplot_method.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 24e3890448d95..0cccfa8a173a0 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -485,7 +485,7 @@ Period Plotting ^^^^^^^^ -- Fixed bug in :meth:`boxplot._grouped_plot_by_column` where trying to plot data of dtype ``object`` caused plotting to fail even if some data was numeric +- When given non-numeric data, :meth:`DataFrame.boxplot` now raises a ``ValueError`` rather than a cryptic ``KeyError`` or ``ZeroDivsionError``, in line with other plotting functions like :meth:`DataFrame.hist`. (:issue:`43480`) - Groupby/resample/rolling diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 9b2906f07ab3b..a2089de294e22 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -390,8 +390,12 @@ def plot_group(keys, values, ax: Axes): rc = {"figure.figsize": figsize} if figsize is not None else {} with plt.rc_context(rc): ax = plt.gca() - data = data.apply(pd.to_numeric, errors="ignore") data = data._get_numeric_data() + naxes = len(data.columns) + if naxes == 0: + raise ValueError( + "boxplot method requires numerical columns, nothing to plot." + ) if columns is None: columns = data.columns else: diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 834f4a4c03727..ce32e5801e461 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -547,7 +547,9 @@ def test_groupby_boxplot_object(self): # GH 43480 df = self.hist_df.astype("object") grouped = df.groupby("gender") - _check_plot_works(grouped.boxplot, subplots=False) + msg = "boxplot method requires numerical columns, nothing to plot" + with pytest.raises(ValueError, match=msg): + _check_plot_works(grouped.boxplot, subplots=False) def test_boxplot_multiindex_column(self): # GH 16748