From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/11] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From cb97a5826a065e5c2423430f63e09b6822dea56a Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 21:17:46 +0200 Subject: [PATCH 02/11] Fix issue 22799 --- pandas/plotting/_matplotlib/core.py | 4 +++- pandas/tests/plotting/test_boxplot_method.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 519465802085b..70eae0d3255ca 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -400,8 +400,10 @@ def _compute_plot_data(self): # GH16953, _convert is needed as fallback, for ``Series`` # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) + + # GH22799, skip datetime type data for computation numeric_data = data.select_dtypes( - include=[np.number, "datetime", "datetimetz", "timedelta"] + include=[np.number, "datetimetz", "timedelta"] ) try: diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index cab0efe53f1fc..127bf2c159b3d 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -10,6 +10,7 @@ import pandas.util._test_decorators as td from pandas import DataFrame, MultiIndex, Series +from pandas import date_range from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm @@ -160,6 +161,19 @@ def test_fontsize(self): df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16 ) + def test_boxplot_numeric_data(self): + # GH 22799 + df = DataFrame( + { + "a": date_range("2012-01-01", periods=100), + "b": np.random.randn(100), + "c": np.random.randn(100) + 2, + "d": date_range("2012-01-01", periods=100).astype(str), + } + ) + ax = df.plot(kind="box") + assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"] + @td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase): From 737bb5b118040fbd40b42669c4fcb9b1bba6abc5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 21:21:22 +0200 Subject: [PATCH 03/11] Add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cc4bab8b9a923..c592bb4fa7884 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -160,7 +160,7 @@ Plotting - - - +- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datatime column (:issue:`22799`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ From e4467044b556a8e419329179e61557f44c48e10b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 22:32:36 +0200 Subject: [PATCH 04/11] Fix bug --- pandas/plotting/_matplotlib/core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 70eae0d3255ca..2477f2a8d0000 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -401,9 +401,14 @@ def _compute_plot_data(self): # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) + # exclude datatime type for boxplot + include_type = [np.number, "datetime", "datetimetz", "timedelta"] + if self._kind == 'box': + include_type.remove('datetime') + # GH22799, skip datetime type data for computation numeric_data = data.select_dtypes( - include=[np.number, "datetimetz", "timedelta"] + include=include_type, ) try: From 5e168a1b43910d9af570f7403f3e4f97cb19d59f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 22:33:10 +0200 Subject: [PATCH 05/11] Reformat by black --- pandas/plotting/_matplotlib/core.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 2477f2a8d0000..45eb3b3ac3b60 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -403,13 +403,11 @@ def _compute_plot_data(self): # exclude datatime type for boxplot include_type = [np.number, "datetime", "datetimetz", "timedelta"] - if self._kind == 'box': - include_type.remove('datetime') + if self._kind == "box": + include_type.remove("datetime") # GH22799, skip datetime type data for computation - numeric_data = data.select_dtypes( - include=include_type, - ) + numeric_data = data.select_dtypes(include=include_type) try: is_empty = numeric_data.empty From 7ebdac86d5f101a1b56e001c537e30ff2225f22e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 22:34:36 +0200 Subject: [PATCH 06/11] Add new line for whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c592bb4fa7884..064f552a9dcb3 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -161,6 +161,7 @@ Plotting - - - Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datatime column (:issue:`22799`) + Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ From 3e942c32cdbfba0e19f2793731ecbdda346c7048 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 22:36:09 +0200 Subject: [PATCH 07/11] Add better comment --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 45eb3b3ac3b60..cf1e9f695935b 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -401,7 +401,7 @@ def _compute_plot_data(self): # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) - # exclude datatime type for boxplot + # GH22799, exclude datatime type for boxplot include_type = [np.number, "datetime", "datetimetz", "timedelta"] if self._kind == "box": include_type.remove("datetime") From a465bb171b0f24ebbdfdf1a6475c7e7b89256a35 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 9 Aug 2019 23:10:05 +0200 Subject: [PATCH 08/11] Fix import order --- pandas/tests/plotting/test_boxplot_method.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 127bf2c159b3d..9d271ef636152 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -9,8 +9,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex, Series -from pandas import date_range +from pandas import DataFrame, MultiIndex, Series, date_range from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm From 597ec6ca2b7150c87bfee98085411f83a1098d22 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 21:30:05 +0200 Subject: [PATCH 09/11] Code change based on review --- pandas/plotting/_matplotlib/core.py | 6 +++--- pandas/tests/plotting/test_boxplot_method.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 9c1c100f04d91..9ea3cdf675608 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -400,11 +400,11 @@ def _compute_plot_data(self): # GH16953, _convert is needed as fallback, for ``Series`` # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) - - # GH22799, exclude datatime type for boxplot + # GH22799, exclude datatime-like type for boxplot include_type = [np.number, "datetime", "datetimetz", "timedelta"] if self._kind == "box": - include_type.remove("datetime") + # TODO: might be buggy: timedelta will be counted as number? + include_type = [np.number] # GH22799, skip datetime type data for computation numeric_data = data.select_dtypes(include=include_type) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 9d271ef636152..98b0848f68137 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -168,6 +168,7 @@ def test_boxplot_numeric_data(self): "b": np.random.randn(100), "c": np.random.randn(100) + 2, "d": date_range("2012-01-01", periods=100).astype(str), + 'e': date_range("2012-01-01", periods=100, tz='UTC') } ) ax = df.plot(kind="box") From c3332355ca263208f5b45e8ad71e1bb4ae5f23b6 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 22:46:39 +0200 Subject: [PATCH 10/11] Fix linting --- pandas/plotting/_matplotlib/core.py | 6 ++++-- pandas/tests/plotting/test_boxplot_method.py | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a34e4b802ee1d..a262f89dcc79c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -409,11 +409,13 @@ def _compute_plot_data(self): include_type.append(np.bool_) # GH22799, exclude datatime-like type for boxplot + exclude_type = None if self._kind == "box": - # TODO: might be buggy: timedelta will be counted as number? + # TODO: change after solving issue 27881 include_type = [np.number] + exclude_type = ["timedelta"] - numeric_data = data.select_dtypes(include=include_type) + numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type) try: is_empty = numeric_data.empty diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 98b0848f68137..5bbaff580c356 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -9,7 +9,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex, Series, date_range +from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range from pandas.tests.plotting.common import TestPlotBase, _check_plot_works import pandas.util.testing as tm @@ -168,7 +168,8 @@ def test_boxplot_numeric_data(self): "b": np.random.randn(100), "c": np.random.randn(100) + 2, "d": date_range("2012-01-01", periods=100).astype(str), - 'e': date_range("2012-01-01", periods=100, tz='UTC') + "e": date_range("2012-01-01", periods=100, tz="UTC"), + "f": timedelta_range("1 days", periods=100), } ) ax = df.plot(kind="box") From a01b3ea15eced6bbf4c3cc1c58a5f8c9985b5b5b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 22:57:04 +0200 Subject: [PATCH 11/11] Update whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2da87ddd8ca1d..b35f230100f8d 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -165,7 +165,7 @@ Plotting - Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`) - -- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datatime column (:issue:`22799`) +- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^