Skip to content

Commit 4cf0250

Browse files
committed
ENH: conversion to UTC between differently indexed datetime indexes, close #864
1 parent 5f11316 commit 4cf0250

File tree

7 files changed

+291
-91
lines changed

7 files changed

+291
-91
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ pandas 0.8.0
3131
conversion method (#1018)
3232
- Implement robust frequency inference function and `inferred_freq` attribute
3333
on DatetimeIndex (#391)
34+
- Convert DatetimeIndexes to UTC if time zones are different in join/setops
35+
(#864)
3436

3537
**Improvements to existing features**
3638

pandas/core/series.py

+25
Original file line numberDiff line numberDiff line change
@@ -2540,6 +2540,31 @@ def _repr_footer(self):
25402540
namestr = "Name: %s, " % str(self.name) if self.name else ""
25412541
return '%s%sLength: %d' % (freqstr, namestr, len(self))
25422542

2543+
def tz_convert(self, tz, copy=True):
2544+
"""
2545+
Convert TimeSeries to target time zone. If it is time zone naive, it
2546+
will be localized to the passed time zone.
2547+
2548+
Parameters
2549+
----------
2550+
tz : string or pytz.timezone object
2551+
copy : boolean, default True
2552+
Also make a copy of the underlying data
2553+
2554+
Returns
2555+
-------
2556+
"""
2557+
if self.index.tz is None:
2558+
new_index = self.index.tz_localize(tz)
2559+
else:
2560+
new_index = self.index.tz_normalize(tz)
2561+
2562+
new_values = self.values
2563+
if copy:
2564+
new_values = new_values.copy()
2565+
2566+
return Series(new_values, index=new_index, name=self.name)
2567+
25432568
def to_timestamp(self, freq='D', how='start', copy=True):
25442569
"""
25452570
Cast to datetimeindex of timestamps, at *beginning* of period

pandas/src/datetime.pyx

+76-80
Original file line numberDiff line numberDiff line change
@@ -765,79 +765,84 @@ except:
765765
trans_cache = {}
766766
utc_offset_cache = {}
767767

768-
cdef ndarray[int64_t] _get_transitions(object tz):
768+
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
769+
cdef:
770+
ndarray[int64_t] utc_dates, result, trans, deltas
771+
Py_ssize_t i, pos, n = len(vals)
772+
int64_t v, offset
773+
774+
if not have_pytz:
775+
import pytz
776+
777+
# Convert to UTC
778+
779+
if tz1.zone != 'UTC':
780+
utc_dates = np.empty(n, dtype=np.int64)
781+
deltas = _get_deltas(tz1)
782+
trans = _get_transitions(tz1)
783+
pos = trans.searchsorted(vals[0])
784+
offset = deltas[pos]
785+
for i in range(n):
786+
v = vals[i]
787+
if v >= trans[pos + 1]:
788+
pos += 1
789+
offset = deltas[pos]
790+
utc_dates[i] = v - offset
791+
else:
792+
utc_dates = vals
793+
794+
if tz2.zone == 'UTC':
795+
return utc_dates
796+
797+
# Convert UTC to other timezone
798+
799+
result = np.empty(n, dtype=np.int64)
800+
trans = _get_transitions(tz2)
801+
deltas = _get_deltas(tz2)
802+
pos = trans.searchsorted(utc_dates[0])
803+
offset = deltas[pos]
804+
for i in range(n):
805+
v = utc_dates[i]
806+
if v >= trans[pos + 1]:
807+
pos += 1
808+
offset = deltas[pos]
809+
result[i] = v + offset
810+
811+
return result
812+
813+
trans_cache = {}
814+
utc_offset_cache = {}
815+
816+
def _get_transitions(object tz):
769817
"""
770818
Get UTC times of DST transitions
771819
"""
772820
if tz not in trans_cache:
773821
arr = np.array(tz._utc_transition_times, dtype='M8[us]')
774-
trans_cache[tz] = np.array(arr.view('i8'))
822+
trans_cache[tz] = arr.view('i8')
775823
return trans_cache[tz]
776824

777-
cdef ndarray[int64_t] _unbox_utcoffsets(object transinfo):
778-
cdef:
779-
Py_ssize_t i, sz
780-
ndarray[int64_t] arr
781-
782-
sz = len(transinfo)
783-
arr = np.empty(sz, dtype='i8')
784-
785-
for i in range(sz):
786-
arr[i] = int(transinfo[i][0].total_seconds()) * 1000000
787-
788-
return arr
789-
790-
cdef int64_t get_utcoffset(object tz, Py_ssize_t idx):
825+
def _get_deltas(object tz):
791826
"""
792827
Get UTC offsets in microseconds corresponding to DST transitions
793828
"""
794-
cdef:
795-
ndarray[int64_t] arr
796829
if tz not in utc_offset_cache:
797830
utc_offset_cache[tz] = _unbox_utcoffsets(tz._transition_info)
798-
arr = utc_offset_cache[tz]
799-
return arr[idx]
800-
801-
def tz_normalize_array(ndarray[int64_t] vals, object tz1, object tz2):
802-
"""
803-
Convert DateRange from one time zone to another (using pytz)
831+
return utc_offset_cache[tz]
804832

805-
Returns
806-
-------
807-
normalized : DateRange
808-
"""
833+
cdef ndarray _unbox_utcoffsets(object transinfo):
809834
cdef:
810-
ndarray[int64_t] result
811-
ndarray[int64_t] trans
812-
Py_ssize_t i, sz, tzidx
813-
int64_t v, tz1offset, tz2offset
814-
815-
if not have_pytz:
816-
raise Exception("Could not find pytz module")
817-
818-
sz = len(vals)
819-
820-
if sz == 0:
821-
return np.empty(0, dtype=np.int64)
822-
823-
result = np.empty(sz, dtype=np.int64)
824-
trans = _get_transitions(tz1)
825-
826-
tzidx = np.searchsorted(trans, vals[0])
835+
Py_ssize_t i, sz
836+
ndarray[int64_t] arr
827837

828-
tz1offset = get_utcoffset(tz1, tzidx)
829-
tz2offset = get_utcoffset(tz2, tzidx)
838+
sz = len(transinfo)
839+
arr = np.empty(sz, dtype='i8')
830840

831841
for i in range(sz):
832-
v = vals[i]
833-
if v >= trans[tzidx + 1]:
834-
tzidx += 1
835-
tz1offset = get_utcoffset(tz1, tzidx)
836-
tz2offset = get_utcoffset(tz2, tzidx)
842+
arr[i] = int(transinfo[i][0].total_seconds()) * 1000000
837843

838-
result[i] = (v - tz1offset) + tz2offset
844+
return arr
839845

840-
return result
841846

842847
def tz_localize_array(ndarray[int64_t] vals, object tz):
843848
"""
@@ -849,43 +854,34 @@ def tz_localize_array(ndarray[int64_t] vals, object tz):
849854
localized : DatetimeIndex
850855
"""
851856
cdef:
852-
ndarray[int64_t] trans
853-
Py_ssize_t i, sz, tzidx
854-
int64_t v, t1, t2, currtrans, tmp
857+
ndarray[int64_t] trans, deltas
858+
Py_ssize_t i, pos, n = len(vals)
859+
int64_t v, t1, t2, tmp
855860

856861
if not have_pytz:
857862
raise Exception("Could not find pytz module")
858863

859864
if tz == pytz.utc or tz is None:
860865
return vals
861866

862-
sz = len(vals)
863-
864-
if sz == 0:
865-
return np.empty(0, dtype=np.int64)
866-
867-
result = np.empty(sz, dtype=np.int64)
868867
trans = _get_transitions(tz)
869-
tzidx = np.searchsorted(trans, vals[0])
868+
deltas = _get_deltas(tz)
870869

871-
currtrans = trans[tzidx]
872-
t1 = currtrans + get_utcoffset(tz, tzidx-1)
873-
t2 = currtrans + get_utcoffset(tz, tzidx)
870+
pos = np.searchsorted(trans, vals[0])
871+
dst_start = trans[pos] + deltas[pos - 1]
872+
dst_end = trans[pos] + deltas[pos]
874873

875-
for i in range(sz):
874+
for i in range(n):
876875
v = vals[i]
877-
if v >= trans[tzidx + 1]:
878-
tzidx += 1
879-
currtrans = trans[tzidx]
880-
t1 = currtrans + get_utcoffset(tz, tzidx-1)
881-
t2 = currtrans + get_utcoffset(tz, tzidx)
882-
883-
if t1 > t2:
884-
tmp = t1
885-
t1 = t2
886-
t2 = tmp
887-
888-
if t1 <= v and v <= t2:
876+
if v >= trans[pos + 1]:
877+
pos += 1
878+
dst_start = trans[pos] + deltas[pos - 1]
879+
dst_end = trans[pos] + deltas[pos]
880+
881+
if dst_start > dst_end:
882+
dst_end, dst_start = dst_start, dst_end
883+
884+
if dst_start <= v and v <= dst_end:
889885
msg = "Cannot localize, ambiguous time %s found" % Timestamp(v)
890886
raise pytz.AmbiguousTimeError(msg)
891887

pandas/src/sandbox.pyx

+88
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,94 @@ from datetime cimport getAbsTime
540540
# int64_t *a
541541

542542

543+
def test_foo(ndarray[int64_t] values):
544+
cdef int64_t val
545+
546+
val = values[0]
547+
print val
543548

544549
def get_abs_time(freq, dailyDate, originalDate):
545550
return getAbsTime(freq, dailyDate, originalDate)
551+
552+
have_pytz = 1
553+
import pytz
554+
555+
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
556+
cdef:
557+
ndarray[int64_t] utc_dates, result, trans, deltas
558+
Py_ssize_t i, pos, n = len(vals)
559+
int64_t v, offset
560+
561+
print 'int64 is: %d' % sizeof(int64_t)
562+
563+
if not have_pytz:
564+
import pytz
565+
566+
# Convert to UTC
567+
568+
if tz1.zone != 'UTC':
569+
utc_dates = np.empty(n, dtype=np.int64)
570+
deltas = _get_deltas(tz1)
571+
trans = _get_transitions(tz1)
572+
pos = trans.searchsorted(vals[0])
573+
offset = deltas[pos]
574+
for i in range(n):
575+
v = vals[i]
576+
if v >= trans[pos + 1]:
577+
pos += 1
578+
offset = deltas[pos]
579+
utc_dates[i] = v - offset
580+
else:
581+
utc_dates = vals
582+
583+
if tz2.zone == 'UTC':
584+
return utc_dates
585+
586+
# Convert UTC to other timezone
587+
588+
result = np.empty(n, dtype=np.int64)
589+
trans = _get_transitions(tz2)
590+
deltas = _get_deltas(tz2)
591+
offset = deltas[pos]
592+
pos = max(0, trans.searchsorted(utc_dates[0], side='right') - 1)
593+
for i in range(n):
594+
v = utc_dates[i]
595+
if v >= trans[pos + 1]:
596+
pos += 1
597+
offset = deltas[pos]
598+
result[i] = v + offset
599+
600+
return result
601+
602+
trans_cache = {}
603+
utc_offset_cache = {}
604+
605+
def _get_transitions(object tz):
606+
"""
607+
Get UTC times of DST transitions
608+
"""
609+
if tz not in trans_cache:
610+
arr = np.array(tz._utc_transition_times, dtype='M8[us]')
611+
trans_cache[tz] = arr.view('i8')
612+
return trans_cache[tz]
613+
614+
def _get_deltas(object tz):
615+
"""
616+
Get UTC offsets in microseconds corresponding to DST transitions
617+
"""
618+
if tz not in utc_offset_cache:
619+
utc_offset_cache[tz] = _unbox_utcoffsets(tz._transition_info)
620+
return utc_offset_cache[tz]
621+
622+
cdef ndarray _unbox_utcoffsets(object transinfo):
623+
cdef:
624+
Py_ssize_t i, sz
625+
ndarray[int64_t] arr
626+
627+
sz = len(transinfo)
628+
arr = np.empty(sz, dtype='i8')
629+
630+
for i in range(sz):
631+
arr[i] = int(transinfo[i][0].total_seconds()) * 1000000
632+
633+
return arr

0 commit comments

Comments
 (0)