diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index dfb7a3675fdd5..abfa57dc09334 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -161,6 +161,7 @@ Datetimelike API Changes - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`) - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) +- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) .. _whatsnew_0240.api.other: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8bbf939e110e9..863ebc6354136 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -11,6 +11,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, _ensure_int64) import pandas.core.algorithms as algos @@ -18,7 +19,7 @@ from pandas._libs.lib import infer_dtype from pandas import (to_timedelta, to_datetime, Categorical, Timestamp, Timedelta, - Series, Interval, IntervalIndex) + Series, Index, Interval, IntervalIndex) import numpy as np @@ -364,6 +365,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None, result = result.astype(np.float64) np.putmask(result, na_mask, np.nan) + bins = _convert_bin_to_datelike_type(bins, dtype) + return result, bins @@ -428,6 +431,26 @@ def _convert_bin_to_numeric_type(bins, dtype): return bins +def _convert_bin_to_datelike_type(bins, dtype): + """ + Convert bins to a DatetimeIndex or TimedeltaIndex if the orginal dtype is + datelike + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Returns + ------- + bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is + datelike + """ + if is_datetime64tz_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype): + bins = Index(bins.astype(np.int64), dtype=dtype) + return bins + + def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None): """ based on the dtype, return our labels """ diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 807fb2530603a..44de3e93d42bf 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -7,7 +7,8 @@ import pandas as pd from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index, Timestamp, Interval, IntervalIndex, Categorical, - cut, qcut, date_range, NaT, TimedeltaIndex) + cut, qcut, date_range, timedelta_range, NaT, + TimedeltaIndex) from pandas.tseries.offsets import Nano, Day import pandas.util.testing as tm from pandas.api.types import CategoricalDtype as CDT @@ -605,3 +606,38 @@ def f(): mask = result.isna() tm.assert_numpy_array_equal( mask, np.array([False, True, True, True, True])) + + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Pacific']) + def test_datetime_cut_roundtrip(self, tz): + # GH 19891 + s = Series(date_range('20180101', periods=3, tz=tz)) + result, result_bins = cut(s, 2, retbins=True) + expected = cut(s, result_bins) + tm.assert_series_equal(result, expected) + expected_bins = DatetimeIndex(['2017-12-31 23:57:07.200000', + '2018-01-02 00:00:00', + '2018-01-03 00:00:00']) + expected_bins = expected_bins.tz_localize(tz) + tm.assert_index_equal(result_bins, expected_bins) + + def test_timedelta_cut_roundtrip(self): + # GH 19891 + s = Series(timedelta_range('1day', periods=3)) + result, result_bins = cut(s, 2, retbins=True) + expected = cut(s, result_bins) + tm.assert_series_equal(result, expected) + expected_bins = TimedeltaIndex(['0 days 23:57:07.200000', + '2 days 00:00:00', + '3 days 00:00:00']) + tm.assert_index_equal(result_bins, expected_bins) + + @pytest.mark.parametrize('arg, expected_bins', [ + [timedelta_range('1day', periods=3), + TimedeltaIndex(['1 days', '2 days', '3 days'])], + [date_range('20180101', periods=3), + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'])]]) + def test_datelike_qcut_bins(self, arg, expected_bins): + # GH 19891 + s = Series(arg) + result, result_bins = qcut(s, 2, retbins=True) + tm.assert_index_equal(result_bins, expected_bins)