Skip to content

ENH: support of pandas.DataFrame.hist for datetime data #36287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Other enhancements
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
- `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)

Expand Down
9 changes: 7 additions & 2 deletions pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,16 @@ def hist_frame(
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
column = [column]
data = data[column]
data = data._get_numeric_data()
# GH32590
data = data.select_dtypes(
include=(np.number, "datetime64", "datetimetz"), exclude="timedelta"
)
naxes = len(data.columns)

if naxes == 0:
raise ValueError("hist method requires numerical columns, nothing to plot.")
raise ValueError(
"hist method requires numerical or datetime columns, nothing to plot."
)

fig, axes = create_subplots(
naxes=naxes,
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.dtypes.api import is_list_like

import pandas as pd
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_datetime
import pandas._testing as tm


Expand All @@ -21,13 +21,16 @@ class TestPlotBase:
"""

def setup_method(self, method):

# GH32590
import matplotlib as mpl

from pandas.plotting._matplotlib import compat

mpl.rcdefaults()

self.start_date_to_int64 = 812419200000000000
self.end_date_to_int64 = 819331200000000000

self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
Expand All @@ -43,13 +46,22 @@ def setup_method(self, method):
gender = np.random.choice(["Male", "Female"], size=n)
classroom = np.random.choice(["A", "B", "C"], size=n)

# GH32590
self.hist_df = DataFrame(
{
"gender": gender,
"classroom": classroom,
"height": random.normal(66, 4, size=n),
"weight": random.normal(161, 32, size=n),
"category": random.randint(4, size=n),
"datetime": to_datetime(
random.randint(
self.start_date_to_int64,
self.end_date_to_int64,
size=n,
dtype=np.int64,
)
),
}
)

Expand Down
86 changes: 64 additions & 22 deletions pandas/tests/plotting/test_hist_method.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
""" Test cases for .hist method """

import numpy as np
from numpy.random import randn
from numpy import int64
from numpy.random import choice, normal, rand, randint, randn
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame, Index, Series
from pandas import DataFrame, Index, Series, to_datetime
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works

Expand Down Expand Up @@ -48,7 +48,7 @@ def test_hist_legacy(self):

@pytest.mark.slow
def test_hist_bins_legacy(self):
df = DataFrame(np.random.randn(10, 2))
df = DataFrame(randn(10, 2))
ax = df.hist(bins=2)[0][0]
assert len(ax.patches) == 2

Expand Down Expand Up @@ -135,7 +135,7 @@ def test_plot_fails_when_ax_differs_from_figure(self):
def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
# GH 6279 - Series histogram can have a legend
index = 15 * ["1"] + 15 * ["2"]
s = Series(np.random.randn(30), index=index, name="a")
s = Series(randn(30), index=index, name="a")
s.index.name = "b"

axes = _check_plot_works(s.hist, legend=True, by=by)
Expand All @@ -146,7 +146,7 @@ def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
def test_hist_with_legend_raises(self, by):
# GH 6279 - Series histogram with legend and label raises
index = 15 * ["1"] + 15 * ["2"]
s = Series(np.random.randn(30), index=index, name="a")
s = Series(randn(30), index=index, name="a")
s.index.name = "b"

with pytest.raises(ValueError, match="Cannot use both legend and label"):
Expand All @@ -157,23 +157,35 @@ def test_hist_with_legend_raises(self, by):
class TestDataFramePlots(TestPlotBase):
@pytest.mark.slow
def test_hist_df_legacy(self):
# GH32590
from matplotlib.patches import Rectangle

with tm.assert_produces_warning(UserWarning):
_check_plot_works(self.hist_df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 3))
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
randint(
self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, grid=False)
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert not axes[1, 1].get_visible()

_check_plot_works(df[[2]].hist)
df = DataFrame(randn(100, 1))
_check_plot_works(df.hist)

# make sure layout is handled
df = DataFrame(randn(100, 6))
df = DataFrame(randn(100, 5))
df[5] = to_datetime(
randint(
self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64
)
)
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(df.hist, layout=(4, 2))
self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
Expand Down Expand Up @@ -225,18 +237,36 @@ def test_hist_df_legacy(self):
ser.hist(foo="bar")

@pytest.mark.slow
def test_hist_non_numerical_raises(self):
# gh-10444
df = DataFrame(np.random.rand(10, 2))
def test_hist_non_numerical_or_datetime_raises(self):
# gh-10444, GH32590
df = DataFrame(
{
"a": rand(10),
"b": randint(0, 10, 10),
"c": to_datetime(
randint(1582800000000000000, 1583500000000000000, 10, dtype=int64)
),
"d": to_datetime(
randint(1582800000000000000, 1583500000000000000, 10, dtype=int64),
utc=True,
),
}
)
df_o = df.astype(object)

msg = "hist method requires numerical columns, nothing to plot."
msg = "hist method requires numerical or datetime columns, nothing to plot."
with pytest.raises(ValueError, match=msg):
df_o.hist()

@pytest.mark.slow
def test_hist_layout(self):
df = DataFrame(randn(100, 3))
# GH32590
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
randint(
self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64
)
)

layout_to_expected_size = (
{"layout": None, "expected_size": (2, 2)}, # default is 2x2
Expand Down Expand Up @@ -266,9 +296,14 @@ def test_hist_layout(self):
df.hist(layout=(-1, -1))

@pytest.mark.slow
# GH 9351
def test_tight_layout(self):
df = DataFrame(randn(100, 3))
# GH 9351, GH32590
df = DataFrame(randn(100, 2))
df[2] = to_datetime(
randint(
self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=int64
)
)
_check_plot_works(df.hist)
self.plt.tight_layout()

Expand Down Expand Up @@ -327,7 +362,7 @@ def test_hist_with_legend(self, by, column):
expected_labels = [expected_labels] * 2

index = Index(15 * ["1"] + 15 * ["2"], name="c")
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
df = DataFrame(randn(30, 2), index=index, columns=["a", "b"])

axes = _check_plot_works(df.hist, legend=True, by=by, column=column)
self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
Expand All @@ -341,7 +376,7 @@ def test_hist_with_legend(self, by, column):
def test_hist_with_legend_raises(self, by, column):
# GH 6279 - DataFrame histogram with legend and label raises
index = Index(15 * ["1"] + 15 * ["2"], name="c")
df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
df = DataFrame(randn(30, 2), index=index, columns=["a", "b"])

with pytest.raises(ValueError, match="Cannot use both legend and label"):
df.hist(legend=True, by=by, column=column, label="d")
Expand All @@ -351,12 +386,18 @@ def test_hist_with_legend_raises(self, by, column):
class TestDataFrameGroupByPlots(TestPlotBase):
@pytest.mark.slow
def test_grouped_hist_legacy(self):
# GH 9351, GH32590
from matplotlib.patches import Rectangle

from pandas.plotting._matplotlib.hist import _grouped_hist

df = DataFrame(randn(500, 2), columns=["A", "B"])
df["C"] = np.random.randint(0, 4, 500)
df = DataFrame(randn(500, 1), columns=["A"])
df["B"] = to_datetime(
randint(
self.start_date_to_int64, self.end_date_to_int64, size=500, dtype=int64
)
)
df["C"] = randint(0, 4, 500)
df["D"] = ["X"] * 500

axes = _grouped_hist(df.A, by=df.C)
Expand Down Expand Up @@ -413,11 +454,12 @@ def test_grouped_hist_legacy(self):

@pytest.mark.slow
def test_grouped_hist_legacy2(self):
# GH32590
n = 10
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
weight = Series(normal(166, 20, size=n))
height = Series(normal(60, 10, size=n))
with tm.RNGContext(42):
gender_int = np.random.choice([0, 1], size=n)
gender_int = choice([0, 1], size=n)
df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
gb = df_int.groupby("gender")
axes = gb.hist()
Expand Down