Skip to content

Commit 79bc335

Browse files
mroeschkejreback
authored andcommitted
ENH: Return DatetimeIndex or TimedeltaIndex bins for q/cut when input is datelike (#20956)
1 parent 7b63769 commit 79bc335

File tree

3 files changed

+62
-2
lines changed

3 files changed

+62
-2
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ Datetimelike API Changes
178178
- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`)
179179
- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`)
180180
- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`)
181+
- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`)
181182

182183
.. _whatsnew_0240.api.other:
183184

pandas/core/reshape/tile.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
is_datetime64_dtype,
1212
is_timedelta64_dtype,
1313
is_datetime64tz_dtype,
14+
is_datetime_or_timedelta_dtype,
1415
_ensure_int64)
1516

1617
import pandas.core.algorithms as algos
1718
import pandas.core.nanops as nanops
1819
from pandas._libs.lib import infer_dtype
1920
from pandas import (to_timedelta, to_datetime,
2021
Categorical, Timestamp, Timedelta,
21-
Series, Interval, IntervalIndex)
22+
Series, Index, Interval, IntervalIndex)
2223

2324
import numpy as np
2425

@@ -364,6 +365,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
364365
result = result.astype(np.float64)
365366
np.putmask(result, na_mask, np.nan)
366367

368+
bins = _convert_bin_to_datelike_type(bins, dtype)
369+
367370
return result, bins
368371

369372

@@ -428,6 +431,26 @@ def _convert_bin_to_numeric_type(bins, dtype):
428431
return bins
429432

430433

434+
def _convert_bin_to_datelike_type(bins, dtype):
435+
"""
436+
Convert bins to a DatetimeIndex or TimedeltaIndex if the orginal dtype is
437+
datelike
438+
439+
Parameters
440+
----------
441+
bins : list-like of bins
442+
dtype : dtype of data
443+
444+
Returns
445+
-------
446+
bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is
447+
datelike
448+
"""
449+
if is_datetime64tz_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype):
450+
bins = Index(bins.astype(np.int64), dtype=dtype)
451+
return bins
452+
453+
431454
def _format_labels(bins, precision, right=True,
432455
include_lowest=False, dtype=None):
433456
""" based on the dtype, return our labels """

pandas/tests/reshape/test_tile.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import pandas as pd
88
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index,
99
Timestamp, Interval, IntervalIndex, Categorical,
10-
cut, qcut, date_range, NaT, TimedeltaIndex)
10+
cut, qcut, date_range, timedelta_range, NaT,
11+
TimedeltaIndex)
1112
from pandas.tseries.offsets import Nano, Day
1213
import pandas.util.testing as tm
1314
from pandas.api.types import CategoricalDtype as CDT
@@ -605,3 +606,38 @@ def f():
605606
mask = result.isna()
606607
tm.assert_numpy_array_equal(
607608
mask, np.array([False, True, True, True, True]))
609+
610+
@pytest.mark.parametrize('tz', [None, 'UTC', 'US/Pacific'])
611+
def test_datetime_cut_roundtrip(self, tz):
612+
# GH 19891
613+
s = Series(date_range('20180101', periods=3, tz=tz))
614+
result, result_bins = cut(s, 2, retbins=True)
615+
expected = cut(s, result_bins)
616+
tm.assert_series_equal(result, expected)
617+
expected_bins = DatetimeIndex(['2017-12-31 23:57:07.200000',
618+
'2018-01-02 00:00:00',
619+
'2018-01-03 00:00:00'])
620+
expected_bins = expected_bins.tz_localize(tz)
621+
tm.assert_index_equal(result_bins, expected_bins)
622+
623+
def test_timedelta_cut_roundtrip(self):
624+
# GH 19891
625+
s = Series(timedelta_range('1day', periods=3))
626+
result, result_bins = cut(s, 2, retbins=True)
627+
expected = cut(s, result_bins)
628+
tm.assert_series_equal(result, expected)
629+
expected_bins = TimedeltaIndex(['0 days 23:57:07.200000',
630+
'2 days 00:00:00',
631+
'3 days 00:00:00'])
632+
tm.assert_index_equal(result_bins, expected_bins)
633+
634+
@pytest.mark.parametrize('arg, expected_bins', [
635+
[timedelta_range('1day', periods=3),
636+
TimedeltaIndex(['1 days', '2 days', '3 days'])],
637+
[date_range('20180101', periods=3),
638+
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'])]])
639+
def test_datelike_qcut_bins(self, arg, expected_bins):
640+
# GH 19891
641+
s = Series(arg)
642+
result, result_bins = qcut(s, 2, retbins=True)
643+
tm.assert_index_equal(result_bins, expected_bins)

0 commit comments

Comments
 (0)