Skip to content

Commit 8768876

Browse files
jschendeljreback
authored andcommitted
BUG: Fix qcut with NaT present (#19833)
1 parent ca27ee9 commit 8768876

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,7 @@ Reshaping
901901
- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`)
902902
- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`)
903903
- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`)
904+
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
904905

905906
Other
906907
^^^^^

pandas/core/reshape/tile.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -279,18 +279,22 @@ def _trim_zeros(x):
279279
def _coerce_to_type(x):
280280
"""
281281
if the passed data is of datetime/timedelta type,
282-
this method converts it to integer so that cut method can
282+
this method converts it to numeric so that cut method can
283283
handle it
284284
"""
285285
dtype = None
286286

287287
if is_timedelta64_dtype(x):
288-
x = to_timedelta(x).view(np.int64)
288+
x = to_timedelta(x)
289289
dtype = np.timedelta64
290290
elif is_datetime64_dtype(x):
291-
x = to_datetime(x).view(np.int64)
291+
x = to_datetime(x)
292292
dtype = np.datetime64
293293

294+
if dtype is not None:
295+
# GH 19768: force NaT to NaN during integer conversion
296+
x = np.where(x.notna(), x.view(np.int64), np.nan)
297+
294298
return x, dtype
295299

296300

pandas/tests/reshape/test_tile.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from pandas import (Series, isna, to_datetime, DatetimeIndex,
88
Timestamp, Interval, IntervalIndex, Categorical,
9-
cut, qcut, date_range)
9+
cut, qcut, date_range, NaT, TimedeltaIndex)
10+
from pandas.tseries.offsets import Nano, Day
1011
import pandas.util.testing as tm
1112
from pandas.api.types import CategoricalDtype as CDT
1213

@@ -250,6 +251,18 @@ def test_qcut_nas(self):
250251
result = qcut(arr, 4)
251252
assert isna(result[:20]).all()
252253

254+
@pytest.mark.parametrize('s', [
255+
Series(DatetimeIndex(['20180101', NaT, '20180103'])),
256+
Series(TimedeltaIndex(['0 days', NaT, '2 days']))],
257+
ids=lambda x: str(x.dtype))
258+
def test_qcut_nat(self, s):
259+
# GH 19768
260+
intervals = IntervalIndex.from_tuples(
261+
[(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])])
262+
expected = Series(Categorical(intervals, ordered=True))
263+
result = qcut(s, 2)
264+
tm.assert_series_equal(result, expected)
265+
253266
def test_qcut_index(self):
254267
result = qcut([0, 2], 2)
255268
intervals = [Interval(-0.001, 1), Interval(1, 2)]

0 commit comments

Comments
 (0)