Skip to content

Commit deca80b

Browse files
committed
ENH: convert UTC -> local, attach right tzinfo #1156
1 parent 808547e commit deca80b

File tree

7 files changed

+161
-189
lines changed

7 files changed

+161
-189
lines changed

pandas/core/series.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -2560,10 +2560,7 @@ def tz_convert(self, tz, copy=True):
25602560
Returns
25612561
-------
25622562
"""
2563-
if self.index.tz is None:
2564-
new_index = self.index.tz_localize(tz)
2565-
else:
2566-
new_index = self.index.tz_normalize(tz)
2563+
new_index = self.index.tz_convert(tz)
25672564

25682565
new_values = self.values
25692566
if copy:

pandas/src/datetime.pxd

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ cdef extern from "numpy/ndarrayobject.h":
6969
NPY_DATETIMEUNIT fr,
7070
npy_datetimestruct *result)
7171

72+
cdef extern from "numpy_helper.h":
73+
npy_datetime unbox_datetime64_scalar(object o)
74+
7275
cdef extern from "numpy/npy_common.h":
7376

7477
ctypedef unsigned char npy_bool

pandas/src/datetime.pyx

+121-111
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class Timestamp(_Timestamp):
4747
__slots__ = ['value', 'offset']
4848

4949
def __new__(cls, object ts_input, object offset=None, tz=None):
50+
cdef _TSObject ts
51+
5052
if isinstance(ts_input, float):
5153
# to do, do we want to support this, ie with fractional seconds?
5254
raise TypeError("Cannot convert a float to datetime")
@@ -61,15 +63,8 @@ class Timestamp(_Timestamp):
6163

6264
# make datetime happy
6365
ts_base = _Timestamp.__new__(
64-
cls,
65-
ts.dtval.year,
66-
ts.dtval.month,
67-
ts.dtval.day,
68-
ts.dtval.hour,
69-
ts.dtval.minute,
70-
ts.dtval.second,
71-
ts.dtval.microsecond,
72-
ts.dtval.tzinfo)
66+
cls, ts.dts.year, ts.dts.month, ts.dts.day,
67+
ts.dts.hour, ts.dts.min, ts.dts.sec, ts.dts.us, ts.tzinfo)
7368

7469
# fill out rest of data
7570
ts_base.value = ts.value
@@ -103,7 +98,7 @@ class Timestamp(_Timestamp):
10398
"""
10499
from pandas.tseries.period import Period
105100

106-
if freq == None:
101+
if freq is None:
107102
freq = self.freq
108103

109104
return Period(self, freq=freq)
@@ -216,19 +211,16 @@ cdef class _Timestamp(datetime):
216211
# lightweight C object to hold datetime & int64 pair
217212
cdef class _TSObject:
218213
cdef:
219-
datetime dtval # python datetime
220-
int64_t value # numpy dt64
221-
222-
property dtval:
223-
def __get__(self):
224-
return self.dtval
214+
npy_datetimestruct dts # npy_datetimestruct
215+
int64_t value # numpy dt64
216+
object tzinfo
225217

226218
property value:
227219
def __get__(self):
228220
return self.value
229221

230222
# helper to extract datetime and int64 from several different possibilities
231-
cpdef convert_to_tsobject(object ts, object tzinfo=None):
223+
cpdef convert_to_tsobject(object ts, object tz=None):
232224
"""
233225
Extract datetime and int64 from any of:
234226
- np.int64
@@ -239,96 +231,107 @@ cpdef convert_to_tsobject(object ts, object tzinfo=None):
239231
- another timestamp object
240232
"""
241233
cdef:
242-
npy_datetimestruct dts
243-
npy_bool islocal, special
244-
NPY_DATETIMEUNIT out_bestunit
245-
char* buf
246-
_Timestamp tmp
247-
_TSObject retval
248-
249-
if isinstance(ts, _TSObject) or ts is None:
250-
return ts
234+
_TSObject obj
251235

252-
retval = _TSObject()
236+
obj = _TSObject()
253237

254-
# pretty expensive - faster way to access as i8?
255238
if is_datetime64_object(ts):
256-
retval.value = ts.view('i8')
257-
PyArray_DatetimeToDatetimeStruct(retval.value, NPY_FR_us, &dts)
258-
retval.dtval = <object>PyDateTime_FromDateAndTime(
259-
dts.year, dts.month,
260-
dts.day, dts.hour,
261-
dts.min, dts.sec, dts.us)
262-
# this is cheap
239+
obj.value = unbox_datetime64_scalar(ts)
240+
PyArray_DatetimeToDatetimeStruct(obj.value, NPY_FR_us, &obj.dts)
263241
elif is_integer_object(ts):
264-
retval.value = ts
265-
PyArray_DatetimeToDatetimeStruct(retval.value, NPY_FR_us, &dts)
266-
retval.dtval = <object>PyDateTime_FromDateAndTime(
267-
dts.year, dts.month,
268-
dts.day, dts.hour,
269-
dts.min, dts.sec, dts.us)
270-
# this is pretty cheap
242+
obj.value = ts
243+
PyArray_DatetimeToDatetimeStruct(ts, NPY_FR_us, &obj.dts)
271244
elif util.is_string_object(ts):
272-
if PyUnicode_Check(ts):
273-
ts = PyUnicode_AsASCIIString(ts);
274-
parse_iso_8601_datetime(ts, len(ts), NPY_FR_us, NPY_UNSAFE_CASTING,
275-
&dts, &islocal, &out_bestunit, &special)
276-
retval.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
277-
retval.dtval = <object>PyDateTime_FromDateAndTime(
278-
dts.year, dts.month,
279-
dts.day, dts.hour,
280-
dts.min, dts.sec, dts.us)
281-
# pretty cheap
245+
_string_to_dts(ts, &obj.dts)
246+
obj.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &obj.dts)
282247
elif PyDateTime_Check(ts):
283-
retval.dtval = ts
284-
# to do this is expensive (10x other constructors)
285-
# convert_pydatetime_to_datetimestruct(<PyObject *>ts, &dts,
286-
# &out_bestunit, 0)
287-
dts.year = PyDateTime_GET_YEAR(ts)
288-
dts.month = PyDateTime_GET_MONTH(ts)
289-
dts.day = PyDateTime_GET_DAY(ts)
290-
dts.hour = PyDateTime_DATE_GET_HOUR(ts)
291-
dts.min = PyDateTime_DATE_GET_MINUTE(ts)
292-
dts.sec = PyDateTime_DATE_GET_SECOND(ts)
293-
dts.us = PyDateTime_DATE_GET_MICROSECOND(ts)
294-
retval.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
248+
obj.value = _pydatetime_to_dts(ts, &obj.dts)
249+
if tz is None:
250+
tz = ts.tzinfo
295251
elif PyDate_Check(ts):
296-
dts.year = PyDateTime_GET_YEAR(ts)
297-
dts.month = PyDateTime_GET_MONTH(ts)
298-
dts.day = PyDateTime_GET_DAY(ts)
299-
retval.dtval = PyDateTime_FromDateAndTime(dts.year, dts.month, dts.day,
300-
0, 0, 0, 0)
301-
dts.hour = 0
302-
dts.min = 0
303-
dts.sec = 0
304-
dts.us = 0
305-
retval.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
306-
# pretty cheap
307-
elif isinstance(ts, _Timestamp):
308-
tmp = ts
309-
retval.value = tmp.value
310-
retval.dtval = tmp
311-
# fallback, does it at least have the right fields?
312-
elif isinstance(ts, object):
313-
dts.year = ts.year
314-
dts.month = ts.month
315-
dts.day = ts.day
316-
dts.hour = ts.hour
317-
dts.min = ts.minute
318-
dts.sec = ts.second
319-
dts.us = ts.microsecond
320-
retval.dtval = <object>PyDateTime_FromDateAndTime(
321-
dts.year, dts.month,
322-
dts.day, dts.hour,
323-
dts.min, dts.sec, dts.us)
324-
retval.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
252+
obj.value = _date_to_datetime64(ts, &obj.dts)
325253
else:
326-
raise ValueError("Could not construct Timestamp from argument %s" % type(ts))
254+
raise ValueError("Could not construct Timestamp from argument %s" %
255+
type(ts))
327256

328-
if tzinfo is not None:
329-
retval.dtval = retval.dtval.replace(tzinfo=tzinfo)
257+
if tz is not None:
258+
if tz is pytz.utc:
259+
obj.tzinfo = tz
260+
else:
261+
# Adjust datetime64 timestamp, recompute datetimestruct
262+
trans = _get_transitions(tz)
263+
deltas = _get_deltas(tz)
264+
pos = trans.searchsorted(obj.value) - 1
265+
inf = tz._transition_info[pos]
266+
267+
obj.value = obj.value + deltas[pos]
268+
PyArray_DatetimeToDatetimeStruct(obj.value, NPY_FR_us, &obj.dts)
269+
obj.tzinfo = tz._tzinfos[inf]
270+
271+
return obj
272+
273+
# elif isinstance(ts, _Timestamp):
274+
# tmp = ts
275+
# obj.value = (<_Timestamp> ts).value
276+
# obj.dtval =
277+
# elif isinstance(ts, object):
278+
# # If all else fails
279+
# obj.value = _dtlike_to_datetime64(ts, &obj.dts)
280+
# obj.dtval = _dts_to_pydatetime(&obj.dts)
281+
282+
cdef inline object _datetime64_to_datetime(int64_t val):
283+
cdef npy_datetimestruct dts
284+
PyArray_DatetimeToDatetimeStruct(val, NPY_FR_us, &dts)
285+
return _dts_to_pydatetime(&dts)
286+
287+
cdef inline object _dts_to_pydatetime(npy_datetimestruct *dts):
288+
return <object> PyDateTime_FromDateAndTime(dts.year, dts.month,
289+
dts.day, dts.hour,
290+
dts.min, dts.sec, dts.us)
291+
292+
cdef inline int64_t _pydatetime_to_dts(object val, npy_datetimestruct *dts):
293+
dts.year = PyDateTime_GET_YEAR(val)
294+
dts.month = PyDateTime_GET_MONTH(val)
295+
dts.day = PyDateTime_GET_DAY(val)
296+
dts.hour = PyDateTime_DATE_GET_HOUR(val)
297+
dts.min = PyDateTime_DATE_GET_MINUTE(val)
298+
dts.sec = PyDateTime_DATE_GET_SECOND(val)
299+
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
300+
return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
301+
302+
cdef inline int64_t _dtlike_to_datetime64(object val,
303+
npy_datetimestruct *dts):
304+
dts.year = val.year
305+
dts.month = val.month
306+
dts.day = val.day
307+
dts.hour = val.hour
308+
dts.min = val.minute
309+
dts.sec = val.second
310+
dts.us = val.microsecond
311+
return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
312+
313+
cdef inline int64_t _date_to_datetime64(object val,
314+
npy_datetimestruct *dts):
315+
dts.year = PyDateTime_GET_YEAR(val)
316+
dts.month = PyDateTime_GET_MONTH(val)
317+
dts.day = PyDateTime_GET_DAY(val)
318+
dts.hour = 0
319+
dts.min = 0
320+
dts.sec = 0
321+
dts.us = 0
322+
return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
330323

331-
return retval
324+
325+
cdef inline int _string_to_dts(object val, npy_datetimestruct* dts) except -1:
326+
cdef:
327+
npy_bool islocal, special
328+
NPY_DATETIMEUNIT out_bestunit
329+
330+
if PyUnicode_Check(val):
331+
val = PyUnicode_AsASCIIString(val);
332+
parse_iso_8601_datetime(val, len(val), NPY_FR_us, NPY_UNSAFE_CASTING,
333+
dts, &islocal, &out_bestunit, &special)
334+
return 0
332335

333336
cdef conversion_factor(time_res res1, time_res res2):
334337
cdef:
@@ -454,10 +457,10 @@ cdef class YearOffset(_Offset):
454457
cdef _TSObject ts = self.ts
455458

456459
self.t = ts.value + self.dayoffset * us_in_day
457-
self.y = ts.dtval.year
460+
self.y = ts.dts.year
458461

459-
self.ly = (ts.dtval.month > 2 or
460-
ts.dtval.month == 2 and ts.dtval.day == 29)
462+
self.ly = (ts.dts.month > 2 or
463+
ts.dts.month == 2 and ts.dts.day == 29)
461464

462465
if self.biz != 0:
463466
self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7
@@ -518,8 +521,8 @@ cdef class MonthOffset(_Offset):
518521
self.t = ts.value + (self.dayoffset * us_in_day)
519522

520523
# for day counting
521-
self.m = ts.dtval.month - 1
522-
self.y = ts.dtval.year
524+
self.m = ts.dts.month - 1
525+
self.y = ts.dts.year
523526
self.ly = is_leapyear(self.y)
524527

525528
if self.biz != 0:
@@ -592,12 +595,12 @@ cdef class DayOfMonthOffset(_Offset):
592595
cdef _TSObject ts = self.ts
593596

594597
# rewind to beginning of month
595-
self.t = ts.value - (ts.dtval.day - 1) * us_in_day
596-
self.dow = dayofweek(ts.dtval.year, ts.dtval.month, 1)
598+
self.t = ts.value - (ts.dts.day - 1) * us_in_day
599+
self.dow = dayofweek(ts.dts.year, ts.dts.month, 1)
597600

598601
# for day counting
599-
self.m = ts.dtval.month - 1
600-
self.y = ts.dtval.year
602+
self.m = ts.dts.month - 1
603+
self.y = ts.dts.year
601604
self.ly = is_leapyear(self.y)
602605

603606
cpdef next(self):
@@ -758,8 +761,9 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
758761
return oresult
759762

760763

764+
#----------------------------------------------------------------------
761765
# Conversion routines
762-
# ------------------------------------------------------------------------------
766+
763767

764768
def pydt_to_i8(object pydt):
765769
'''
@@ -779,8 +783,8 @@ def i8_to_pydt(int64_t i8, object tzinfo = None):
779783
'''
780784
return Timestamp(i8)
781785

786+
#----------------------------------------------------------------------
782787
# time zone conversion helpers
783-
# ------------------------------------------------------------------------------
784788

785789
try:
786790
import pytz
@@ -803,7 +807,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
803807
utc_dates = np.empty(n, dtype=np.int64)
804808
deltas = _get_deltas(tz1)
805809
trans = _get_transitions(tz1)
806-
pos = trans.searchsorted(vals[0])
810+
pos = trans.searchsorted(vals[0]) - 1
811+
if pos < 0:
812+
raise ValueError('First time before start of DST info')
813+
807814
offset = deltas[pos]
808815
for i in range(n):
809816
v = vals[i]
@@ -822,7 +829,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
822829
result = np.empty(n, dtype=np.int64)
823830
trans = _get_transitions(tz2)
824831
deltas = _get_deltas(tz2)
825-
pos = trans.searchsorted(utc_dates[0])
832+
pos = trans.searchsorted(utc_dates[0]) - 1
833+
if pos < 0:
834+
raise ValueError('First time before start of DST info')
835+
826836
offset = deltas[pos]
827837
for i in range(n):
828838
v = utc_dates[i]
@@ -1148,7 +1158,7 @@ def monthrange(int64_t year, int64_t month):
11481158
return (dayofweek(year, month, 1), days)
11491159

11501160
cdef inline int64_t ts_dayofweek(_TSObject ts):
1151-
return dayofweek(ts.dtval.year, ts.dtval.month, ts.dtval.day)
1161+
return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day)
11521162

11531163
# Period logic
11541164
#----------------------------------------------------------------------

pandas/src/numpy_helper.h

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "Python.h"
22
#include "numpy/ndarrayobject.h"
3+
#include "numpy/arrayscalars.h"
34

45
#ifndef PANDAS_INLINE
56
#if defined(__GNUC__)
@@ -47,6 +48,12 @@ get_nat() {
4748
return NPY_MIN_INT64;
4849
}
4950

51+
PANDAS_INLINE npy_datetime
52+
unbox_datetime64_scalar(PyObject* obj) {
53+
return ((PyDatetimeScalarObject*) obj)->obval;
54+
55+
}
56+
5057
PANDAS_INLINE int
5158
is_integer_object(PyObject* obj) {
5259
return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj);

0 commit comments

Comments
 (0)