Skip to content

Commit f180969

Browse files
committed
BUG: Retain timezone dtype with cut and qcut
Add aditional test
1 parent e97be6f commit f180969

File tree

3 files changed

+50
-4
lines changed

3 files changed

+50
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,7 @@ Reshaping
906906
- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`)
907907
- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`)
908908
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
909+
- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
909910

910911
Other
911912
^^^^^

pandas/core/reshape/tile.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
is_categorical_dtype,
1010
is_datetime64_dtype,
1111
is_timedelta64_dtype,
12+
is_datetime64tz_dtype,
1213
_ensure_int64)
1314

1415
import pandas.core.algorithms as algos
@@ -284,7 +285,9 @@ def _coerce_to_type(x):
284285
"""
285286
dtype = None
286287

287-
if is_timedelta64_dtype(x):
288+
if is_datetime64tz_dtype(x):
289+
dtype = x.dtype
290+
elif is_timedelta64_dtype(x):
288291
x = to_timedelta(x)
289292
dtype = np.timedelta64
290293
elif is_datetime64_dtype(x):
@@ -333,7 +336,11 @@ def _format_labels(bins, precision, right=True,
333336

334337
closed = 'right' if right else 'left'
335338

336-
if is_datetime64_dtype(dtype):
339+
if is_datetime64tz_dtype(dtype):
340+
from functools import partial
341+
formatter = partial(Timestamp, tz=dtype.tz)
342+
adjust = lambda x: x - Timedelta('1ns')
343+
elif is_datetime64_dtype(dtype):
337344
formatter = Timestamp
338345
adjust = lambda x: x - Timedelta('1ns')
339346
elif is_timedelta64_dtype(dtype):
@@ -372,7 +379,11 @@ def _preprocess_for_cut(x):
372379
series_index = x.index
373380
name = x.name
374381

375-
x = np.asarray(x)
382+
ndim = getattr(x, 'ndim', None)
383+
if ndim is None:
384+
x = np.asarray(x)
385+
if x.ndim != 1:
386+
raise ValueError("Input array must be 1 dimensional")
376387

377388
return x_is_series, series_index, name, x
378389

pandas/tests/reshape/test_tile.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from pandas.compat import zip
66

7-
from pandas import (Series, isna, to_datetime, DatetimeIndex,
7+
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex,
88
Timestamp, Interval, IntervalIndex, Categorical,
99
cut, qcut, date_range, NaT, TimedeltaIndex)
1010
from pandas.tseries.offsets import Nano, Day
@@ -104,6 +104,12 @@ def test_cut_corner(self):
104104

105105
pytest.raises(ValueError, cut, [1, 2, 3], 0.5)
106106

107+
@pytest.mark.parametrize('arg', [2, np.eye(2), DataFrame(np.eye(2))])
108+
@pytest.mark.parametrize('cut_func', [cut, qcut])
109+
def test_cut_not_1d_arg(self, arg, cut_func):
110+
with pytest.raises(ValueError):
111+
cut_func(arg, 2)
112+
107113
def test_cut_out_of_range_more(self):
108114
# #1511
109115
s = Series([0, -1, 0, 1, -3], name='x')
@@ -488,6 +494,34 @@ def test_datetime_cut(self):
488494
result, bins = cut(data, 3, retbins=True)
489495
tm.assert_series_equal(Series(result), expected)
490496

497+
def test_datetimetz_cut(self):
498+
# GH 19872
499+
tz = 'US/Eastern'
500+
s = Series(date_range('20130101', periods=3, tz=tz))
501+
result = cut(s, 3)
502+
expected = (
503+
Series(IntervalIndex([
504+
Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
505+
Timestamp('2013-01-01 16:00:00', tz=tz)),
506+
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
507+
Timestamp('2013-01-02 08:00:00', tz=tz)),
508+
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
509+
Timestamp('2013-01-03 00:00:00', tz=tz))]))
510+
.astype(CDT(ordered=True)))
511+
tm.assert_series_equal(result, expected)
512+
513+
result = qcut(s, 3)
514+
expected = (
515+
Series(IntervalIndex([
516+
Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
517+
Timestamp('2013-01-01 16:00:00', tz=tz)),
518+
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
519+
Timestamp('2013-01-02 08:00:00', tz=tz)),
520+
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
521+
Timestamp('2013-01-03 00:00:00', tz=tz))]))
522+
.astype(CDT(ordered=True)))
523+
tm.assert_series_equal(result, expected)
524+
491525
def test_datetime_bin(self):
492526
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
493527
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']

0 commit comments

Comments
 (0)