Skip to content

Commit 2ebfae0

Browse files
changhiskhanwesm
authored andcommitted
BUG: DatetimeIndex.to_period buggy with tz #2232
1 parent f7143cb commit 2ebfae0

File tree

6 files changed

+95
-18
lines changed

6 files changed

+95
-18
lines changed

pandas/src/datetime.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from numpy cimport int64_t, int32_t, npy_int64, npy_int32
1+
from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray
22
from cpython cimport PyObject
33

44
from cpython cimport PyUnicode_Check, PyUnicode_AsASCIIString
@@ -140,3 +140,6 @@ cdef inline int _cstring_to_dts(char *val, int length,
140140
NPY_UNSAFE_CASTING,
141141
dts, &islocal, &out_bestunit, &special)
142142
return result
143+
144+
cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
145+
int freq, object tz)

pandas/src/datetime.pyx

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,3 +1603,74 @@ cpdef normalize_date(object dt):
16031603
return datetime(dt.year, dt.month, dt.day)
16041604
else:
16051605
raise TypeError('Unrecognized type: %s' % type(dt))
1606+
1607+
1608+
cdef extern from "period.h":
1609+
int64_t get_period_ordinal(int year, int month, int day,
1610+
int hour, int minute, int second,
1611+
int freq) except INT32_MIN
1612+
1613+
cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
1614+
int freq, object tz):
1615+
cdef:
1616+
Py_ssize_t n = len(stamps)
1617+
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
1618+
ndarray[int64_t] trans, deltas, pos
1619+
pandas_datetimestruct dts
1620+
1621+
if not have_pytz:
1622+
raise Exception('Could not find pytz module')
1623+
1624+
if _is_utc(tz):
1625+
for i in range(n):
1626+
if stamps[i] == NPY_NAT:
1627+
result[i] = NPY_NAT
1628+
continue
1629+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
1630+
result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
1631+
dts.hour, dts.min, dts.sec, freq)
1632+
1633+
elif _is_tzlocal(tz):
1634+
for i in range(n):
1635+
if stamps[i] == NPY_NAT:
1636+
result[i] = NPY_NAT
1637+
continue
1638+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns,
1639+
&dts)
1640+
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
1641+
dts.min, dts.sec, dts.us, tz)
1642+
delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
1643+
pandas_datetime_to_datetimestruct(stamps[i] + delta,
1644+
PANDAS_FR_ns, &dts)
1645+
result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
1646+
dts.hour, dts.min, dts.sec, freq)
1647+
else:
1648+
# Adjust datetime64 timestamp, recompute datetimestruct
1649+
trans = _get_transitions(tz)
1650+
deltas = _get_deltas(tz)
1651+
_pos = trans.searchsorted(stamps, side='right') - 1
1652+
if _pos.dtype != np.int64:
1653+
_pos = _pos.astype(np.int64)
1654+
pos = _pos
1655+
1656+
# statictzinfo
1657+
if not hasattr(tz, '_transition_info'):
1658+
for i in range(n):
1659+
if stamps[i] == NPY_NAT:
1660+
result[i] = NPY_NAT
1661+
continue
1662+
pandas_datetime_to_datetimestruct(stamps[i] + deltas[0],
1663+
PANDAS_FR_ns, &dts)
1664+
result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
1665+
dts.hour, dts.min, dts.sec, freq)
1666+
else:
1667+
for i in range(n):
1668+
if stamps[i] == NPY_NAT:
1669+
result[i] = NPY_NAT
1670+
continue
1671+
pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]],
1672+
PANDAS_FR_ns, &dts)
1673+
result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
1674+
dts.hour, dts.min, dts.sec, freq)
1675+
1676+
return result

pandas/src/plib.pyx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# cython: profile=False
2-
32
cimport numpy as np
43
import numpy as np
54

@@ -24,7 +23,6 @@ import_array()
2423
# import datetime C API
2524
PyDateTime_IMPORT
2625

27-
2826
cdef extern from "period.h":
2927
ctypedef struct date_info:
3028
int64_t absdate
@@ -103,7 +101,7 @@ cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult):
103101

104102
return period_ord_w_mult * mult + 1;
105103

106-
def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
104+
def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None):
107105
"""
108106
Convert array of datetime64 values (passed in as 'i8' dtype) to a set of
109107
periods corresponding to desired frequency, per period convention.
@@ -117,10 +115,13 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
117115

118116
out = np.empty(l, dtype='i8')
119117

120-
for i in range(l):
121-
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
122-
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
123-
dts.hour, dts.min, dts.sec, freq)
118+
if tz is None:
119+
for i in range(l):
120+
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
121+
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
122+
dts.hour, dts.min, dts.sec, freq)
123+
else:
124+
out = localize_dt64arr_to_period(dtarr, freq, tz)
124125
return out
125126

126127
def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
@@ -353,4 +354,3 @@ cdef accessor _get_accessor_func(int code):
353354
return &pweekday
354355
else:
355356
raise ValueError('Unrecognized code: %s' % code)
356-

pandas/tseries/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ def to_period(self, freq=None):
703703
if freq is None:
704704
freq = get_period_alias(self.freqstr)
705705

706-
return PeriodIndex(self.values, freq=freq)
706+
return PeriodIndex(self.values, freq=freq, tz=self.tz)
707707

708708
def order(self, return_indexer=False, ascending=True):
709709
"""

pandas/tseries/period.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -421,12 +421,12 @@ def _get_ordinals(data, freq):
421421
return lib.map_infer(data, f)
422422

423423

424-
def dt64arr_to_periodarr(data, freq):
424+
def dt64arr_to_periodarr(data, freq, tz):
425425
if data.dtype != np.dtype('M8[ns]'):
426426
raise ValueError('Wrong dtype: %s' % data.dtype)
427427

428428
base, mult = _gfc(freq)
429-
return plib.dt64arr_to_periodarr(data.view('i8'), base)
429+
return plib.dt64arr_to_periodarr(data.view('i8'), base, tz)
430430

431431
# --- Period index sketch
432432
def _period_index_cmp(opname):
@@ -494,6 +494,8 @@ class PeriodIndex(Int64Index):
494494
hour : int or array, default None
495495
minute : int or array, default None
496496
second : int or array, default None
497+
tz : object, default None
498+
Timezone for converting datetime64 data to Periods
497499
498500
Examples
499501
--------
@@ -514,7 +516,8 @@ def __new__(cls, data=None, ordinal=None,
514516
freq=None, start=None, end=None, periods=None,
515517
copy=False, name=None,
516518
year=None, month=None, quarter=None, day=None,
517-
hour=None, minute=None, second=None):
519+
hour=None, minute=None, second=None,
520+
tz=None):
518521

519522
freq = _freq_mod.get_standard_freq(freq)
520523

@@ -531,9 +534,9 @@ def __new__(cls, data=None, ordinal=None,
531534
else:
532535
fields = [year, month, quarter, day, hour, minute, second]
533536
data, freq = cls._generate_range(start, end, periods,
534-
freq, fields)
537+
freq, fields)
535538
else:
536-
ordinal, freq = cls._from_arraylike(data, freq)
539+
ordinal, freq = cls._from_arraylike(data, freq, tz)
537540
data = np.array(ordinal, dtype=np.int64, copy=False)
538541

539542
subarr = data.view(cls)
@@ -562,7 +565,7 @@ def _generate_range(cls, start, end, periods, freq, fields):
562565
return subarr, freq
563566

564567
@classmethod
565-
def _from_arraylike(cls, data, freq):
568+
def _from_arraylike(cls, data, freq, tz):
566569
if not isinstance(data, np.ndarray):
567570
if np.isscalar(data) or isinstance(data, Period):
568571
raise ValueError('PeriodIndex() must be called with a '
@@ -608,7 +611,7 @@ def _from_arraylike(cls, data, freq):
608611
'inferred from first element'))
609612

610613
if np.issubdtype(data.dtype, np.datetime64):
611-
data = dt64arr_to_periodarr(data, freq)
614+
data = dt64arr_to_periodarr(data, freq, tz)
612615
elif data.dtype == np.int64:
613616
pass
614617
else:
@@ -1219,4 +1222,3 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None):
12191222
"""
12201223
return PeriodIndex(start=start, end=end, periods=periods,
12211224
freq=freq, name=name)
1222-

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
563563
plib_depends = [srcpath(f, suffix='.pyx')
564564
for f in plib_depends]
565565
plib_depends.append('pandas/src/util.pxd')
566+
plib_depends.append('pandas/src/datetime.pxd')
566567
else:
567568
tseries_depends = []
568569
plib_depends = []

0 commit comments

Comments
 (0)