Skip to content

Commit 03709d4

Browse files
authored
ENH: support of pandas.DataFrame.hist for datetime data (pandas-dev#36287)
1 parent 1b9641c commit 03709d4

File tree

4 files changed

+87
-13
lines changed

4 files changed

+87
-13
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ Other enhancements
185185
- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
186186
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
187187
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
188+
- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)
188189
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
189190
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
190191
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)

pandas/plotting/_matplotlib/hist.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -417,11 +417,16 @@ def hist_frame(
417417
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
418418
column = [column]
419419
data = data[column]
420-
data = data._get_numeric_data()
420+
# GH32590
421+
data = data.select_dtypes(
422+
include=(np.number, "datetime64", "datetimetz"), exclude="timedelta"
423+
)
421424
naxes = len(data.columns)
422425

423426
if naxes == 0:
424-
raise ValueError("hist method requires numerical columns, nothing to plot.")
427+
raise ValueError(
428+
"hist method requires numerical or datetime columns, nothing to plot."
429+
)
425430

426431
fig, axes = create_subplots(
427432
naxes=naxes,

pandas/tests/plotting/common.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.dtypes.api import is_list_like
1111

1212
import pandas as pd
13-
from pandas import DataFrame, Series
13+
from pandas import DataFrame, Series, to_datetime
1414
import pandas._testing as tm
1515

1616

@@ -28,6 +28,9 @@ def setup_method(self, method):
2828

2929
mpl.rcdefaults()
3030

31+
self.start_date_to_int64 = 812419200000000000
32+
self.end_date_to_int64 = 819331200000000000
33+
3134
self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
3235
self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
3336
self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
@@ -50,6 +53,14 @@ def setup_method(self, method):
5053
"height": random.normal(66, 4, size=n),
5154
"weight": random.normal(161, 32, size=n),
5255
"category": random.randint(4, size=n),
56+
"datetime": to_datetime(
57+
random.randint(
58+
self.start_date_to_int64,
59+
self.end_date_to_int64,
60+
size=n,
61+
dtype=np.int64,
62+
)
63+
),
5364
}
5465
)
5566

pandas/tests/plotting/test_hist_method.py

+67-10
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pandas.util._test_decorators as td
88

9-
from pandas import DataFrame, Index, Series
9+
from pandas import DataFrame, Index, Series, to_datetime
1010
import pandas._testing as tm
1111
from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
1212

@@ -163,17 +163,34 @@ def test_hist_df_legacy(self):
163163
_check_plot_works(self.hist_df.hist)
164164

165165
# make sure layout is handled
166-
df = DataFrame(randn(100, 3))
166+
df = DataFrame(randn(100, 2))
167+
df[2] = to_datetime(
168+
np.random.randint(
169+
self.start_date_to_int64,
170+
self.end_date_to_int64,
171+
size=100,
172+
dtype=np.int64,
173+
)
174+
)
167175
with tm.assert_produces_warning(UserWarning):
168176
axes = _check_plot_works(df.hist, grid=False)
169177
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
170178
assert not axes[1, 1].get_visible()
171179

180+
_check_plot_works(df[[2]].hist)
172181
df = DataFrame(randn(100, 1))
173182
_check_plot_works(df.hist)
174183

175184
# make sure layout is handled
176-
df = DataFrame(randn(100, 6))
185+
df = DataFrame(randn(100, 5))
186+
df[5] = to_datetime(
187+
np.random.randint(
188+
self.start_date_to_int64,
189+
self.end_date_to_int64,
190+
size=100,
191+
dtype=np.int64,
192+
)
193+
)
177194
with tm.assert_produces_warning(UserWarning):
178195
axes = _check_plot_works(df.hist, layout=(4, 2))
179196
self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
@@ -225,18 +242,42 @@ def test_hist_df_legacy(self):
225242
ser.hist(foo="bar")
226243

227244
@pytest.mark.slow
228-
def test_hist_non_numerical_raises(self):
229-
# gh-10444
230-
df = DataFrame(np.random.rand(10, 2))
245+
def test_hist_non_numerical_or_datetime_raises(self):
246+
# gh-10444, GH32590
247+
df = DataFrame(
248+
{
249+
"a": np.random.rand(10),
250+
"b": np.random.randint(0, 10, 10),
251+
"c": to_datetime(
252+
np.random.randint(
253+
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
254+
)
255+
),
256+
"d": to_datetime(
257+
np.random.randint(
258+
1582800000000000000, 1583500000000000000, 10, dtype=np.int64
259+
),
260+
utc=True,
261+
),
262+
}
263+
)
231264
df_o = df.astype(object)
232265

233-
msg = "hist method requires numerical columns, nothing to plot."
266+
msg = "hist method requires numerical or datetime columns, nothing to plot."
234267
with pytest.raises(ValueError, match=msg):
235268
df_o.hist()
236269

237270
@pytest.mark.slow
238271
def test_hist_layout(self):
239-
df = DataFrame(randn(100, 3))
272+
df = DataFrame(randn(100, 2))
273+
df[2] = to_datetime(
274+
np.random.randint(
275+
self.start_date_to_int64,
276+
self.end_date_to_int64,
277+
size=100,
278+
dtype=np.int64,
279+
)
280+
)
240281

241282
layout_to_expected_size = (
242283
{"layout": None, "expected_size": (2, 2)}, # default is 2x2
@@ -268,7 +309,15 @@ def test_hist_layout(self):
268309
@pytest.mark.slow
269310
# GH 9351
270311
def test_tight_layout(self):
271-
df = DataFrame(randn(100, 3))
312+
df = DataFrame(np.random.randn(100, 2))
313+
df[2] = to_datetime(
314+
np.random.randint(
315+
self.start_date_to_int64,
316+
self.end_date_to_int64,
317+
size=100,
318+
dtype=np.int64,
319+
)
320+
)
272321
_check_plot_works(df.hist)
273322
self.plt.tight_layout()
274323

@@ -355,7 +404,15 @@ def test_grouped_hist_legacy(self):
355404

356405
from pandas.plotting._matplotlib.hist import _grouped_hist
357406

358-
df = DataFrame(randn(500, 2), columns=["A", "B"])
407+
df = DataFrame(randn(500, 1), columns=["A"])
408+
df["B"] = to_datetime(
409+
np.random.randint(
410+
self.start_date_to_int64,
411+
self.end_date_to_int64,
412+
size=500,
413+
dtype=np.int64,
414+
)
415+
)
359416
df["C"] = np.random.randint(0, 4, 500)
360417
df["D"] = ["X"] * 500
361418

0 commit comments

Comments
 (0)