From e8989e602b4d885064b6dad024b95d6503a16021 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Fri, 11 Sep 2020 18:00:45 +0800 Subject: [PATCH 01/15] ENH: support of pandas.DataFrame.hist for datetime data --- pandas/core/generic.py | 3 +++ pandas/core/internals/managers.py | 9 +++++++++ pandas/plotting/_matplotlib/hist.py | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fffd2e068ebcf..df77daf9b6270 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5438,6 +5438,9 @@ def _get_numeric_data(self): def _get_bool_data(self): return self._constructor(self._mgr.get_bool_data()).__finalize__(self) + def _get_numeric_or_datetime_data(self): + return self._constructor(self._mgr.get_numeric_or_datetime_data()).__finalize__(self) + # ---------------------------------------------------------------------- # Internal Interface Methods diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3f446874ffd0e..e6416f94fdc72 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -693,6 +693,15 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager": """ return self._combine([b for b in self.blocks if b.is_numeric], copy) + def get_numeric_or_datetime_data(self, copy: bool = False) -> "BlockManager": + """ + Parameters + ---------- + copy : bool, default False + Whether to copy the blocks + """ + return self._combine([b for b in self.blocks if b.is_numeric or b.is_datetime], copy) + def _combine(self: T, blocks: List[Block], copy: bool = True) -> T: """ return a new manager with the blocks """ if len(blocks) == 0: diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 89035552d4309..5ad38c72f524c 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -417,7 +417,8 @@ def hist_frame( if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] - data = data._get_numeric_data() + # GH32590 + data = data._get_numeric_or_datetime_data() naxes = len(data.columns) if naxes == 0: From ed86887caf9bfd58a89df98fcdacdd8aae81f9ad Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sun, 13 Sep 2020 13:08:58 +0800 Subject: [PATCH 02/15] Revert "ENH: support of pandas.DataFrame.hist for datetime data" This reverts commit e8989e602b4d885064b6dad024b95d6503a16021. --- pandas/core/generic.py | 3 --- pandas/core/internals/managers.py | 9 --------- pandas/plotting/_matplotlib/hist.py | 3 +-- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index df77daf9b6270..fffd2e068ebcf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5438,9 +5438,6 @@ def _get_numeric_data(self): def _get_bool_data(self): return self._constructor(self._mgr.get_bool_data()).__finalize__(self) - def _get_numeric_or_datetime_data(self): - return self._constructor(self._mgr.get_numeric_or_datetime_data()).__finalize__(self) - # ---------------------------------------------------------------------- # Internal Interface Methods diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e6416f94fdc72..3f446874ffd0e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -693,15 +693,6 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager": """ return self._combine([b for b in self.blocks if b.is_numeric], copy) - def get_numeric_or_datetime_data(self, copy: bool = False) -> "BlockManager": - """ - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks - """ - return self._combine([b for b in self.blocks if b.is_numeric or b.is_datetime], copy) - def _combine(self: T, blocks: List[Block], copy: bool = True) -> T: """ return a new manager with the blocks """ if len(blocks) == 0: diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 5ad38c72f524c..89035552d4309 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -417,8 +417,7 @@ def hist_frame( if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] - # GH32590 - data = data._get_numeric_or_datetime_data() + data = data._get_numeric_data() naxes = len(data.columns) if naxes == 0: From 9dc96cf97ad9abf0f634107460821034be6fe188 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sun, 13 Sep 2020 18:01:06 +0800 Subject: [PATCH 03/15] ENH: support of pandas.DataFrame.hist for datetime data --- pandas/plotting/_matplotlib/hist.py | 15 +++++++++++++-- pandas/tests/plotting/test_hist_method.py | 21 ++++++++++++++++----- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 89035552d4309..c75307ea5dd4c 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.common import is_integer, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass from pandas.core.dtypes.missing import isna, remove_na_arraylike +from pandas.core.reshape.concat import concat from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot @@ -417,11 +418,21 @@ def hist_frame( if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] - data = data._get_numeric_data() + # GH32590 + columns_copy = data.columns + numeric_data = data._get_numeric_data() + datetime_data = data.select_dtypes(include="datetime64[ns]") + data = concat([numeric_data, datetime_data], axis=1) naxes = len(data.columns) if naxes == 0: - raise ValueError("hist method requires numerical columns, nothing to plot.") + raise ValueError( + "hist method requires numerical or datetime columns, nothing to plot." + ) + else: + data = data.reindex( + columns=[name for name in columns_copy if name in data.columns] + ) fig, axes = create_subplots( naxes=naxes, diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 34c881855d16a..ab1c5e2519e79 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index, Series, to_datetime import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase, _check_plot_works @@ -225,12 +225,23 @@ def test_hist_df_legacy(self): ser.hist(foo="bar") @pytest.mark.slow - def test_hist_non_numerical_raises(self): - # gh-10444 - df = DataFrame(np.random.rand(10, 2)) + def test_hist_non_numerical_or_datetime_raises(self): + # gh-10444, GH32590 + df = DataFrame( + { + "a": np.random.rand(10), + "b": np.random.rand(10), + "c": to_datetime( + np.random.randint(1582800000000000000, 1583500000000000000, 10) + ), + "d": to_datetime( + np.random.randint(1582800000000000000, 1583500000000000000, 10) + ), + } + ) df_o = df.astype(object) - msg = "hist method requires numerical columns, nothing to plot." + msg = "hist method requires numerical or datetime columns, nothing to plot." with pytest.raises(ValueError, match=msg): df_o.hist() From 602c59b6fb7200641ee6e747ecfa1c19b58adbf9 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sun, 13 Sep 2020 19:32:51 +0800 Subject: [PATCH 04/15] REF: reformat imports --- pandas/plotting/_matplotlib/hist.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index c75307ea5dd4c..00d209314bf91 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.common import is_integer, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass from pandas.core.dtypes.missing import isna, remove_na_arraylike + from pandas.core.reshape.concat import concat from pandas.io.formats.printing import pprint_thing From 17c182f2fe08433611651b1e44ca3d31a96d71d2 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Mon, 14 Sep 2020 18:01:43 +0800 Subject: [PATCH 05/15] UPD: changed as requested --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/plotting/_matplotlib/hist.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ba556c8dcca54..d8430cf7f1de9 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -104,6 +104,7 @@ Other enhancements - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) +- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) - .. _whatsnew_120.api_breaking.python: diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 00d209314bf91..288ceac1597d4 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -430,10 +430,10 @@ def hist_frame( raise ValueError( "hist method requires numerical or datetime columns, nothing to plot." ) - else: - data = data.reindex( - columns=[name for name in columns_copy if name in data.columns] - ) + + data = data.reindex( + columns=[name for name in columns_copy if name in data.columns], copy=False + ) fig, axes = create_subplots( naxes=naxes, From 2cfc9799270c8a17e42d6d7aafc10a84b030ca43 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Mon, 14 Sep 2020 18:39:03 +0800 Subject: [PATCH 06/15] FIX: recover whatsnew note v1.2.0 recover info deleted by mistake --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d9f071cb058a4..f466a49986c34 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -117,6 +117,7 @@ Other enhancements - :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) +- `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) - From 7569a319d6d30fc36a1e0156df095fdcbf6d9119 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Mon, 14 Sep 2020 18:41:04 +0800 Subject: [PATCH 07/15] UPD: delete an extra line --- doc/source/whatsnew/v1.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f466a49986c34..54c6ff285ba3d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -121,7 +121,6 @@ Other enhancements - :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) - - .. _whatsnew_120.api_breaking.python: Increased minimum version for Python From 35cccd649060b3428a9495c7485b2a93c283cdd0 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Tue, 15 Sep 2020 20:57:54 +0800 Subject: [PATCH 08/15] UPD: explicit is better than implicit --- pandas/tests/plotting/test_hist_method.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index ab1c5e2519e79..c2c959541e612 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -232,10 +232,14 @@ def test_hist_non_numerical_or_datetime_raises(self): "a": np.random.rand(10), "b": np.random.rand(10), "c": to_datetime( - np.random.randint(1582800000000000000, 1583500000000000000, 10) + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ) ), "d": to_datetime( - np.random.randint(1582800000000000000, 1583500000000000000, 10) + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ) ), } ) From bd7f450391afbb1c4088a51591236f7af27ad48e Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Thu, 17 Sep 2020 23:55:59 +0800 Subject: [PATCH 09/15] UPD: hist support for type of datetime64 and datetimetz --- pandas/plotting/_matplotlib/hist.py | 2 +- pandas/tests/plotting/test_hist_method.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 288ceac1597d4..71f3538439b80 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -422,7 +422,7 @@ def hist_frame( # GH32590 columns_copy = data.columns numeric_data = data._get_numeric_data() - datetime_data = data.select_dtypes(include="datetime64[ns]") + datetime_data = data.select_dtypes(include=["datetime64", "datetimetz"]) data = concat([numeric_data, datetime_data], axis=1) naxes = len(data.columns) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index c2c959541e612..edd013d8e065a 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -239,7 +239,8 @@ def test_hist_non_numerical_or_datetime_raises(self): "d": to_datetime( np.random.randint( 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 - ) + ), + utc=True, ), } ) From 3a9925049ef0f0645d6fabb06d4ca55501d1cf7c Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sun, 20 Sep 2020 22:06:57 +0800 Subject: [PATCH 10/15] UPD: modify and reformat related tests --- pandas/plotting/_matplotlib/hist.py | 13 +--- pandas/tests/plotting/common.py | 16 ++++- pandas/tests/plotting/test_hist_method.py | 76 +++++++++++++++-------- 3 files changed, 68 insertions(+), 37 deletions(-) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 71f3538439b80..6d22d2ffe4a51 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -6,8 +6,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass from pandas.core.dtypes.missing import isna, remove_na_arraylike -from pandas.core.reshape.concat import concat - from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot from pandas.plotting._matplotlib.tools import ( @@ -420,10 +418,9 @@ def hist_frame( column = [column] data = data[column] # GH32590 - columns_copy = data.columns - numeric_data = data._get_numeric_data() - datetime_data = data.select_dtypes(include=["datetime64", "datetimetz"]) - data = concat([numeric_data, datetime_data], axis=1) + data = data.select_dtypes( + include=(np.number, "datetime64", "datetimetz"), exclude="timedelta" + ) naxes = len(data.columns) if naxes == 0: @@ -431,10 +428,6 @@ def hist_frame( "hist method requires numerical or datetime columns, nothing to plot." ) - data = data.reindex( - columns=[name for name in columns_copy if name in data.columns], copy=False - ) - fig, axes = create_subplots( naxes=naxes, ax=ax, diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 9301a29933d45..e8ca598bc7c6d 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, to_datetime import pandas._testing as tm @@ -21,13 +21,16 @@ class TestPlotBase: """ def setup_method(self, method): - + # GH32590 import matplotlib as mpl from pandas.plotting._matplotlib import compat mpl.rcdefaults() + self.start_date_to_int64 = 812419200000000000 + self.end_date_to_int64 = 819331200000000000 + self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3() self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0() self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0() @@ -43,6 +46,7 @@ def setup_method(self, method): gender = np.random.choice(["Male", "Female"], size=n) classroom = np.random.choice(["A", "B", "C"], size=n) + # GH32590 self.hist_df = DataFrame( { "gender": gender, @@ -50,6 +54,14 @@ def setup_method(self, method): "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), "category": random.randint(4, size=n), + "datetime": to_datetime( + random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=n, + dtype=np.int64, + ) + ), } ) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index edd013d8e065a..0a0d357a09994 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -1,7 +1,7 @@ """ Test cases for .hist method """ -import numpy as np -from numpy.random import randn +from numpy import int64 +from numpy.random import choice, normal, rand, randint, randn import pytest import pandas.util._test_decorators as td @@ -48,7 +48,7 @@ def test_hist_legacy(self): @pytest.mark.slow def test_hist_bins_legacy(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 @@ -135,7 +135,7 @@ def test_plot_fails_when_ax_differs_from_figure(self): def test_hist_with_legend(self, by, expected_axes_num, expected_layout): # GH 6279 - Series histogram can have a legend index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.randn(30), index=index, name="a") + s = Series(randn(30), index=index, name="a") s.index.name = "b" axes = _check_plot_works(s.hist, legend=True, by=by) @@ -146,7 +146,7 @@ def test_hist_with_legend(self, by, expected_axes_num, expected_layout): def test_hist_with_legend_raises(self, by): # GH 6279 - Series histogram with legend and label raises index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.randn(30), index=index, name="a") + s = Series(randn(30), index=index, name="a") s.index.name = "b" with pytest.raises(ValueError, match="Cannot use both legend and label"): @@ -157,23 +157,35 @@ def test_hist_with_legend_raises(self, by): class TestDataFramePlots(TestPlotBase): @pytest.mark.slow def test_hist_df_legacy(self): + # GH32590 from matplotlib.patches import Rectangle with tm.assert_produces_warning(UserWarning): _check_plot_works(self.hist_df.hist) # make sure layout is handled - df = DataFrame(randn(100, 3)) + df = DataFrame(randn(100, 2)) + df[2] = to_datetime( + randint( + self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + ) + ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, grid=False) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) assert not axes[1, 1].get_visible() + _check_plot_works(df[[2]].hist) df = DataFrame(randn(100, 1)) _check_plot_works(df.hist) # make sure layout is handled - df = DataFrame(randn(100, 6)) + df = DataFrame(randn(100, 5)) + df[5] = to_datetime( + randint( + self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + ) + ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, layout=(4, 2)) self._check_axes_shape(axes, axes_num=6, layout=(4, 2)) @@ -229,17 +241,13 @@ def test_hist_non_numerical_or_datetime_raises(self): # gh-10444, GH32590 df = DataFrame( { - "a": np.random.rand(10), - "b": np.random.rand(10), + "a": rand(10), + "b": randint(0, 10, 10), "c": to_datetime( - np.random.randint( - 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 - ) + randint(1582800000000000000, 1583500000000000000, 10, dtype=int64) ), "d": to_datetime( - np.random.randint( - 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 - ), + randint(1582800000000000000, 1583500000000000000, 10, dtype=int64), utc=True, ), } @@ -252,7 +260,13 @@ def test_hist_non_numerical_or_datetime_raises(self): @pytest.mark.slow def test_hist_layout(self): - df = DataFrame(randn(100, 3)) + # GH32590 + df = DataFrame(randn(100, 2)) + df[2] = to_datetime( + randint( + self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + ) + ) layout_to_expected_size = ( {"layout": None, "expected_size": (2, 2)}, # default is 2x2 @@ -282,9 +296,14 @@ def test_hist_layout(self): df.hist(layout=(-1, -1)) @pytest.mark.slow - # GH 9351 def test_tight_layout(self): - df = DataFrame(randn(100, 3)) + # GH 9351, GH32590 + df = DataFrame(randn(100, 2)) + df[2] = to_datetime( + randint( + self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + ) + ) _check_plot_works(df.hist) self.plt.tight_layout() @@ -343,7 +362,7 @@ def test_hist_with_legend(self, by, column): expected_labels = [expected_labels] * 2 index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame(randn(30, 2), index=index, columns=["a", "b"]) axes = _check_plot_works(df.hist, legend=True, by=by, column=column) self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) @@ -357,7 +376,7 @@ def test_hist_with_legend(self, by, column): def test_hist_with_legend_raises(self, by, column): # GH 6279 - DataFrame histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame(randn(30, 2), index=index, columns=["a", "b"]) with pytest.raises(ValueError, match="Cannot use both legend and label"): df.hist(legend=True, by=by, column=column, label="d") @@ -367,12 +386,18 @@ def test_hist_with_legend_raises(self, by, column): class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_grouped_hist_legacy(self): + # GH 9351, GH32590 from matplotlib.patches import Rectangle from pandas.plotting._matplotlib.hist import _grouped_hist - df = DataFrame(randn(500, 2), columns=["A", "B"]) - df["C"] = np.random.randint(0, 4, 500) + df = DataFrame(randn(500, 1), columns=["A"]) + df["B"] = to_datetime( + randint( + self.start_date_to_int64, self.end_date_to_int64, size=500, dtype=int64 + ) + ) + df["C"] = randint(0, 4, 500) df["D"] = ["X"] * 500 axes = _grouped_hist(df.A, by=df.C) @@ -429,11 +454,12 @@ def test_grouped_hist_legacy(self): @pytest.mark.slow def test_grouped_hist_legacy2(self): + # GH32590 n = 10 - weight = Series(np.random.normal(166, 20, size=n)) - height = Series(np.random.normal(60, 10, size=n)) + weight = Series(normal(166, 20, size=n)) + height = Series(normal(60, 10, size=n)) with tm.RNGContext(42): - gender_int = np.random.choice([0, 1], size=n) + gender_int = choice([0, 1], size=n) df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) gb = df_int.groupby("gender") axes = gb.hist() From e9fa7ecf33e917fb9d54fa356f57b496b59e8d79 Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Sun, 4 Oct 2020 20:41:55 +0800 Subject: [PATCH 11/15] UPD: reformat code --- pandas/tests/plotting/common.py | 1 - pandas/tests/plotting/test_hist_method.py | 92 +++++++++++++---------- 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index e8ca598bc7c6d..f8c359e1cee55 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -21,7 +21,6 @@ class TestPlotBase: """ def setup_method(self, method): - # GH32590 import matplotlib as mpl from pandas.plotting._matplotlib import compat diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 0a0d357a09994..5eea1f0684e50 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -1,7 +1,7 @@ """ Test cases for .hist method """ -from numpy import int64 -from numpy.random import choice, normal, rand, randint, randn +import numpy as np +from numpy.random import randn import pytest import pandas.util._test_decorators as td @@ -48,7 +48,7 @@ def test_hist_legacy(self): @pytest.mark.slow def test_hist_bins_legacy(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 @@ -103,8 +103,8 @@ def test_hist_layout_with_by(self): def test_hist_no_overlap(self): from matplotlib.pyplot import gcf, subplot - x = Series(randn(2)) - y = Series(randn(2)) + x = Series(np.random.randn(2)) + y = Series(np.random.randn(2)) subplot(121) x.hist() subplot(122) @@ -135,7 +135,7 @@ def test_plot_fails_when_ax_differs_from_figure(self): def test_hist_with_legend(self, by, expected_axes_num, expected_layout): # GH 6279 - Series histogram can have a legend index = 15 * ["1"] + 15 * ["2"] - s = Series(randn(30), index=index, name="a") + s = Series(np.random.randn(30), index=index, name="a") s.index.name = "b" axes = _check_plot_works(s.hist, legend=True, by=by) @@ -146,7 +146,7 @@ def test_hist_with_legend(self, by, expected_axes_num, expected_layout): def test_hist_with_legend_raises(self, by): # GH 6279 - Series histogram with legend and label raises index = 15 * ["1"] + 15 * ["2"] - s = Series(randn(30), index=index, name="a") + s = Series(np.random.randn(30), index=index, name="a") s.index.name = "b" with pytest.raises(ValueError, match="Cannot use both legend and label"): @@ -157,17 +157,19 @@ def test_hist_with_legend_raises(self, by): class TestDataFramePlots(TestPlotBase): @pytest.mark.slow def test_hist_df_legacy(self): - # GH32590 from matplotlib.patches import Rectangle with tm.assert_produces_warning(UserWarning): _check_plot_works(self.hist_df.hist) # make sure layout is handled - df = DataFrame(randn(100, 2)) + df = DataFrame(np.random.randn(100, 2)) df[2] = to_datetime( - randint( - self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, ) ) with tm.assert_produces_warning(UserWarning): @@ -176,14 +178,17 @@ def test_hist_df_legacy(self): assert not axes[1, 1].get_visible() _check_plot_works(df[[2]].hist) - df = DataFrame(randn(100, 1)) + df = DataFrame(np.random.randn(100, 1)) _check_plot_works(df.hist) # make sure layout is handled - df = DataFrame(randn(100, 5)) + df = DataFrame(np.random.randn(100, 5)) df[5] = to_datetime( - randint( - self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, ) ) with tm.assert_produces_warning(UserWarning): @@ -241,13 +246,17 @@ def test_hist_non_numerical_or_datetime_raises(self): # gh-10444, GH32590 df = DataFrame( { - "a": rand(10), - "b": randint(0, 10, 10), + "a": np.random.rand(10), + "b": np.random.randint(0, 10, 10), "c": to_datetime( - randint(1582800000000000000, 1583500000000000000, 10, dtype=int64) + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ) ), "d": to_datetime( - randint(1582800000000000000, 1583500000000000000, 10, dtype=int64), + np.random.randint( + 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 + ), utc=True, ), } @@ -260,11 +269,13 @@ def test_hist_non_numerical_or_datetime_raises(self): @pytest.mark.slow def test_hist_layout(self): - # GH32590 - df = DataFrame(randn(100, 2)) + df = DataFrame(np.random.randn(100, 2)) df[2] = to_datetime( - randint( - self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, ) ) @@ -297,11 +308,14 @@ def test_hist_layout(self): @pytest.mark.slow def test_tight_layout(self): - # GH 9351, GH32590 - df = DataFrame(randn(100, 2)) + # GH 9351 + df = DataFrame(np.random.randn(100, 2)) df[2] = to_datetime( - randint( - self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64 + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=100, + dtype=np.int64, ) ) _check_plot_works(df.hist) @@ -362,7 +376,7 @@ def test_hist_with_legend(self, by, column): expected_labels = [expected_labels] * 2 index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) axes = _check_plot_works(df.hist, legend=True, by=by, column=column) self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout) @@ -376,7 +390,7 @@ def test_hist_with_legend(self, by, column): def test_hist_with_legend_raises(self, by, column): # GH 6279 - DataFrame histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) with pytest.raises(ValueError, match="Cannot use both legend and label"): df.hist(legend=True, by=by, column=column, label="d") @@ -386,18 +400,21 @@ def test_hist_with_legend_raises(self, by, column): class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_grouped_hist_legacy(self): - # GH 9351, GH32590 + # GH 9351 from matplotlib.patches import Rectangle from pandas.plotting._matplotlib.hist import _grouped_hist - df = DataFrame(randn(500, 1), columns=["A"]) + df = DataFrame(np.random.randn(500, 1), columns=["A"]) df["B"] = to_datetime( - randint( - self.start_date_to_int64, self.end_date_to_int64, size=500, dtype=int64 + np.random.randint( + self.start_date_to_int64, + self.end_date_to_int64, + size=500, + dtype=np.int64, ) ) - df["C"] = randint(0, 4, 500) + df["C"] = np.random.randint(0, 4, 500) df["D"] = ["X"] * 500 axes = _grouped_hist(df.A, by=df.C) @@ -454,12 +471,11 @@ def test_grouped_hist_legacy(self): @pytest.mark.slow def test_grouped_hist_legacy2(self): - # GH32590 n = 10 - weight = Series(normal(166, 20, size=n)) - height = Series(normal(60, 10, size=n)) + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) with tm.RNGContext(42): - gender_int = choice([0, 1], size=n) + gender_int = np.random.choice([0, 1], size=n) df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) gb = df_int.groupby("gender") axes = gb.hist() From b24df4d56fdffc93757b29fd68b2605524720425 Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Sun, 4 Oct 2020 20:53:09 +0800 Subject: [PATCH 12/15] UPD: focus on main issues --- doc/source/whatsnew/v1.2.0.rst | 1 - pandas/tests/plotting/common.py | 1 - pandas/tests/plotting/test_hist_method.py | 16 ++++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 498021898f115..d67e5c1a3fd40 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -165,7 +165,6 @@ Other enhancements - :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) -- `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`) - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index f8c359e1cee55..87a41268c57a6 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -45,7 +45,6 @@ def setup_method(self, method): gender = np.random.choice(["Male", "Female"], size=n) classroom = np.random.choice(["A", "B", "C"], size=n) - # GH32590 self.hist_df = DataFrame( { "gender": gender, diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 5eea1f0684e50..e6421770fbf70 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -103,8 +103,8 @@ def test_hist_layout_with_by(self): def test_hist_no_overlap(self): from matplotlib.pyplot import gcf, subplot - x = Series(np.random.randn(2)) - y = Series(np.random.randn(2)) + x = Series(randn(2)) + y = Series(randn(2)) subplot(121) x.hist() subplot(122) @@ -163,7 +163,7 @@ def test_hist_df_legacy(self): _check_plot_works(self.hist_df.hist) # make sure layout is handled - df = DataFrame(np.random.randn(100, 2)) + df = DataFrame(randn(100, 2)) df[2] = to_datetime( np.random.randint( self.start_date_to_int64, @@ -178,11 +178,11 @@ def test_hist_df_legacy(self): assert not axes[1, 1].get_visible() _check_plot_works(df[[2]].hist) - df = DataFrame(np.random.randn(100, 1)) + df = DataFrame(randn(100, 1)) _check_plot_works(df.hist) # make sure layout is handled - df = DataFrame(np.random.randn(100, 5)) + df = DataFrame(randn(100, 5)) df[5] = to_datetime( np.random.randint( self.start_date_to_int64, @@ -268,8 +268,9 @@ def test_hist_non_numerical_or_datetime_raises(self): df_o.hist() @pytest.mark.slow + # GH 9351 def test_hist_layout(self): - df = DataFrame(np.random.randn(100, 2)) + df = DataFrame(randn(100, 2)) df[2] = to_datetime( np.random.randint( self.start_date_to_int64, @@ -400,12 +401,11 @@ def test_hist_with_legend_raises(self, by, column): class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_grouped_hist_legacy(self): - # GH 9351 from matplotlib.patches import Rectangle from pandas.plotting._matplotlib.hist import _grouped_hist - df = DataFrame(np.random.randn(500, 1), columns=["A"]) + df = DataFrame(randn(500, 1), columns=["A"]) df["B"] = to_datetime( np.random.randint( self.start_date_to_int64, From b02c017f9ecdc2578710e146f8f61b842406e263 Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Sun, 4 Oct 2020 20:56:05 +0800 Subject: [PATCH 13/15] UPD: recover # GH 9351 --- pandas/tests/plotting/test_hist_method.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index e6421770fbf70..d9a58e808661b 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -268,7 +268,6 @@ def test_hist_non_numerical_or_datetime_raises(self): df_o.hist() @pytest.mark.slow - # GH 9351 def test_hist_layout(self): df = DataFrame(randn(100, 2)) df[2] = to_datetime( @@ -308,8 +307,8 @@ def test_hist_layout(self): df.hist(layout=(-1, -1)) @pytest.mark.slow + # GH 9351 def test_tight_layout(self): - # GH 9351 df = DataFrame(np.random.randn(100, 2)) df[2] = to_datetime( np.random.randint( From 315d77e9d55f2178d5bbca9af7f5300e67f969ef Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Sun, 4 Oct 2020 20:57:47 +0800 Subject: [PATCH 14/15] UPD: recover a blank line --- pandas/tests/plotting/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 87a41268c57a6..288a7f14ac1d3 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -21,6 +21,7 @@ class TestPlotBase: """ def setup_method(self, method): + import matplotlib as mpl from pandas.plotting._matplotlib import compat From c8c74605b8fb1ffb3ca48b430d949b1dfde94159 Mon Sep 17 00:00:00 2001 From: Honfung Wong Date: Sun, 4 Oct 2020 20:58:58 +0800 Subject: [PATCH 15/15] UPD: reformat the blank line --- pandas/tests/plotting/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 288a7f14ac1d3..2a6bd97c93b8e 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -21,7 +21,7 @@ class TestPlotBase: """ def setup_method(self, method): - + import matplotlib as mpl from pandas.plotting._matplotlib import compat