Skip to content

Commit 2a9cbc0

Browse files
committed
BUG: Retain timezone dtype with cut and qcut
Add aditional test Adjust test and move import to top add additional test boolean mask support for numpy 1.9
1 parent 63ce781 commit 2a9cbc0

File tree

3 files changed

+97
-34
lines changed

3 files changed

+97
-34
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,7 @@ Reshaping
10181018
- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ to datetimes (:issue:`19671`)
10191019
- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`)
10201020
- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
1021+
- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
10211022

10221023
Other
10231024
^^^^^

pandas/core/reshape/tile.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Quantilization functions and related stuff
33
"""
4+
from functools import partial
45

56
from pandas.core.dtypes.missing import isna
67
from pandas.core.dtypes.common import (
@@ -9,6 +10,7 @@
910
is_categorical_dtype,
1011
is_datetime64_dtype,
1112
is_timedelta64_dtype,
13+
is_datetime64tz_dtype,
1214
_ensure_int64)
1315

1416
import pandas.core.algorithms as algos
@@ -239,7 +241,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
239241
ids = _ensure_int64(bins.searchsorted(x, side=side))
240242

241243
if include_lowest:
242-
ids[x == bins[0]] = 1
244+
# Numpy 1.9 support: ensure this mask is a Numpy array
245+
ids[np.asarray(x == bins[0])] = 1
243246

244247
na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
245248
has_nas = na_mask.any()
@@ -284,7 +287,9 @@ def _coerce_to_type(x):
284287
"""
285288
dtype = None
286289

287-
if is_timedelta64_dtype(x):
290+
if is_datetime64tz_dtype(x):
291+
dtype = x.dtype
292+
elif is_timedelta64_dtype(x):
288293
x = to_timedelta(x)
289294
dtype = np.timedelta64
290295
elif is_datetime64_dtype(x):
@@ -305,7 +310,7 @@ def _convert_bin_to_numeric_type(bins, dtype):
305310
306311
Parameters
307312
----------
308-
bins : list-liek of bins
313+
bins : list-like of bins
309314
dtype : dtype of data
310315
311316
Raises
@@ -333,7 +338,10 @@ def _format_labels(bins, precision, right=True,
333338

334339
closed = 'right' if right else 'left'
335340

336-
if is_datetime64_dtype(dtype):
341+
if is_datetime64tz_dtype(dtype):
342+
formatter = partial(Timestamp, tz=dtype.tz)
343+
adjust = lambda x: x - Timedelta('1ns')
344+
elif is_datetime64_dtype(dtype):
337345
formatter = Timestamp
338346
adjust = lambda x: x - Timedelta('1ns')
339347
elif is_timedelta64_dtype(dtype):
@@ -372,7 +380,11 @@ def _preprocess_for_cut(x):
372380
series_index = x.index
373381
name = x.name
374382

375-
x = np.asarray(x)
383+
ndim = getattr(x, 'ndim', None)
384+
if ndim is None:
385+
x = np.asarray(x)
386+
if x.ndim != 1:
387+
raise ValueError("Input array must be 1 dimensional")
376388

377389
return x_is_series, series_index, name, x
378390

pandas/tests/reshape/test_tile.py

+79-29
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from pandas.compat import zip
66

7-
from pandas import (Series, isna, to_datetime, DatetimeIndex,
7+
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index,
88
Timestamp, Interval, IntervalIndex, Categorical,
99
cut, qcut, date_range, NaT, TimedeltaIndex)
1010
from pandas.tseries.offsets import Nano, Day
@@ -104,6 +104,12 @@ def test_cut_corner(self):
104104

105105
pytest.raises(ValueError, cut, [1, 2, 3], 0.5)
106106

107+
@pytest.mark.parametrize('arg', [2, np.eye(2), DataFrame(np.eye(2))])
108+
@pytest.mark.parametrize('cut_func', [cut, qcut])
109+
def test_cut_not_1d_arg(self, arg, cut_func):
110+
with pytest.raises(ValueError):
111+
cut_func(arg, 2)
112+
107113
def test_cut_out_of_range_more(self):
108114
# #1511
109115
s = Series([0, -1, 0, 1, -3], name='x')
@@ -251,18 +257,6 @@ def test_qcut_nas(self):
251257
result = qcut(arr, 4)
252258
assert isna(result[:20]).all()
253259

254-
@pytest.mark.parametrize('s', [
255-
Series(DatetimeIndex(['20180101', NaT, '20180103'])),
256-
Series(TimedeltaIndex(['0 days', NaT, '2 days']))],
257-
ids=lambda x: str(x.dtype))
258-
def test_qcut_nat(self, s):
259-
# GH 19768
260-
intervals = IntervalIndex.from_tuples(
261-
[(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])])
262-
expected = Series(Categorical(intervals, ordered=True))
263-
result = qcut(s, 2)
264-
tm.assert_series_equal(result, expected)
265-
266260
def test_qcut_index(self):
267261
result = qcut([0, 2], 2)
268262
intervals = [Interval(-0.001, 1), Interval(1, 2)]
@@ -452,6 +446,37 @@ def test_single_bin(self):
452446
result = cut(s, 1, labels=False)
453447
tm.assert_series_equal(result, expected)
454448

449+
@pytest.mark.parametrize(
450+
"array_1_writeable, array_2_writeable",
451+
[(True, True), (True, False), (False, False)])
452+
def test_cut_read_only(self, array_1_writeable, array_2_writeable):
453+
# issue 18773
454+
array_1 = np.arange(0, 100, 10)
455+
array_1.flags.writeable = array_1_writeable
456+
457+
array_2 = np.arange(0, 100, 10)
458+
array_2.flags.writeable = array_2_writeable
459+
460+
hundred_elements = np.arange(100)
461+
462+
tm.assert_categorical_equal(cut(hundred_elements, array_1),
463+
cut(hundred_elements, array_2))
464+
465+
466+
class TestDatelike(object):
467+
468+
@pytest.mark.parametrize('s', [
469+
Series(DatetimeIndex(['20180101', NaT, '20180103'])),
470+
Series(TimedeltaIndex(['0 days', NaT, '2 days']))],
471+
ids=lambda x: str(x.dtype))
472+
def test_qcut_nat(self, s):
473+
# GH 19768
474+
intervals = IntervalIndex.from_tuples(
475+
[(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])])
476+
expected = Series(Categorical(intervals, ordered=True))
477+
result = qcut(s, 2)
478+
tm.assert_series_equal(result, expected)
479+
455480
def test_datetime_cut(self):
456481
# GH 14714
457482
# testing for time data to be present as series
@@ -488,6 +513,47 @@ def test_datetime_cut(self):
488513
result, bins = cut(data, 3, retbins=True)
489514
tm.assert_series_equal(Series(result), expected)
490515

516+
@pytest.mark.parametrize('bins', [
517+
3, [Timestamp('2013-01-01 04:57:07.200000').value,
518+
Timestamp('2013-01-01 21:00:00').value,
519+
Timestamp('2013-01-02 13:00:00').value,
520+
Timestamp('2013-01-03 05:00:00').value]])
521+
@pytest.mark.parametrize('const', [list, np.array, Index, Series])
522+
def test_datetimetz_cut(self, bins, const):
523+
# GH 19872
524+
tz = 'US/Eastern'
525+
s = Series(date_range('20130101', periods=3, tz=tz))
526+
if not isinstance(bins, int):
527+
bins = const(bins)
528+
result = cut(s, bins)
529+
expected = (
530+
Series(IntervalIndex([
531+
Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
532+
Timestamp('2013-01-01 16:00:00', tz=tz)),
533+
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
534+
Timestamp('2013-01-02 08:00:00', tz=tz)),
535+
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
536+
Timestamp('2013-01-03 00:00:00', tz=tz))]))
537+
.astype(CDT(ordered=True)))
538+
tm.assert_series_equal(result, expected)
539+
540+
@pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)])
541+
def test_datetimetz_qcut(self, bins):
542+
# GH 19872
543+
tz = 'US/Eastern'
544+
s = Series(date_range('20130101', periods=3, tz=tz))
545+
result = qcut(s, bins)
546+
expected = (
547+
Series(IntervalIndex([
548+
Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
549+
Timestamp('2013-01-01 16:00:00', tz=tz)),
550+
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
551+
Timestamp('2013-01-02 08:00:00', tz=tz)),
552+
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
553+
Timestamp('2013-01-03 00:00:00', tz=tz))]))
554+
.astype(CDT(ordered=True)))
555+
tm.assert_series_equal(result, expected)
556+
491557
def test_datetime_bin(self):
492558
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
493559
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
@@ -523,19 +589,3 @@ def f():
523589
mask = result.isna()
524590
tm.assert_numpy_array_equal(
525591
mask, np.array([False, True, True, True, True]))
526-
527-
@pytest.mark.parametrize(
528-
"array_1_writeable, array_2_writeable",
529-
[(True, True), (True, False), (False, False)])
530-
def test_cut_read_only(self, array_1_writeable, array_2_writeable):
531-
# issue 18773
532-
array_1 = np.arange(0, 100, 10)
533-
array_1.flags.writeable = array_1_writeable
534-
535-
array_2 = np.arange(0, 100, 10)
536-
array_2.flags.writeable = array_2_writeable
537-
538-
hundred_elements = np.arange(100)
539-
540-
tm.assert_categorical_equal(cut(hundred_elements, array_1),
541-
cut(hundred_elements, array_2))

0 commit comments

Comments
 (0)