Skip to content

Commit 808547e

Browse files
committed
REF: working on UTC only timestamp handling, timezone handling modifications, not yet working
1 parent 83fa1a3 commit 808547e

File tree

7 files changed

+443
-301
lines changed

7 files changed

+443
-301
lines changed

pandas/core/index.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -386,11 +386,13 @@ def asof(self, label):
386386

387387
def asof_locs(self, where, mask):
388388
"""
389+
where : array of timestamps
390+
mask : array of booleans where data is NA
389391
390392
"""
391393
locs = self.values[mask].searchsorted(where.values, side='right')
392-
locs = np.where(locs > 0, locs - 1, 0)
393394

395+
locs = np.where(locs > 0, locs - 1, 0)
394396
result = np.arange(len(self))[mask].take(locs)
395397

396398
first = mask.argmax()

pandas/src/datetime.pyx

+5-10
Original file line numberDiff line numberDiff line change
@@ -788,9 +788,6 @@ try:
788788
except:
789789
have_pytz = False
790790

791-
trans_cache = {}
792-
utc_offset_cache = {}
793-
794791
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
795792
cdef:
796793
ndarray[int64_t] utc_dates, result, trans, deltas
@@ -839,7 +836,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
839836
trans_cache = {}
840837
utc_offset_cache = {}
841838

842-
def _get_transitions(object tz):
839+
def _get_transitions(tz):
843840
"""
844841
Get UTC times of DST transitions
845842
"""
@@ -848,7 +845,7 @@ def _get_transitions(object tz):
848845
trans_cache[tz] = arr.view('i8')
849846
return trans_cache[tz]
850847

851-
def _get_deltas(object tz):
848+
def _get_deltas(tz):
852849
"""
853850
Get UTC offsets in microseconds corresponding to DST transitions
854851
"""
@@ -860,7 +857,7 @@ cdef double total_seconds(object td): # Python 2.6 compat
860857
return ((td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) //
861858
10**6)
862859

863-
cdef ndarray _unbox_utcoffsets(object transinfo):
860+
cpdef ndarray _unbox_utcoffsets(object transinfo):
864861
cdef:
865862
Py_ssize_t i, sz
866863
ndarray[int64_t] arr
@@ -874,7 +871,7 @@ cdef ndarray _unbox_utcoffsets(object transinfo):
874871
return arr
875872

876873

877-
def tz_localize(ndarray[int64_t] vals, object tz):
874+
def tz_localize_check(ndarray[int64_t] vals, object tz):
878875
"""
879876
Localize tzinfo-naive DateRange to given time zone (using pytz). If
880877
there are ambiguities in the values, raise AmbiguousTimeError.
@@ -892,7 +889,7 @@ def tz_localize(ndarray[int64_t] vals, object tz):
892889
raise Exception("Could not find pytz module")
893890

894891
if tz == pytz.utc or tz is None:
895-
return vals
892+
return
896893

897894
trans = _get_transitions(tz)
898895
deltas = _get_deltas(tz)
@@ -915,8 +912,6 @@ def tz_localize(ndarray[int64_t] vals, object tz):
915912
msg = "Cannot localize, ambiguous time %s found" % Timestamp(v)
916913
raise pytz.AmbiguousTimeError(msg)
917914

918-
return vals
919-
920915

921916
# Accessors
922917
#----------------------------------------------------------------------

pandas/tests/test_algos.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import unittest
2+
3+
import numpy as np
4+
5+
import pandas.core.algorithms as algos
6+
import pandas.util.testing as tm
7+

pandas/tseries/index.py

+130-61
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
from pandas._tseries import Timestamp
1616
import pandas._tseries as lib
1717

18+
def _utc():
19+
import pytz
20+
return pytz.utc
21+
1822
# -------- some conversion wrapper functions
1923

2024
def _as_i8(arg):
@@ -195,48 +199,8 @@ def __new__(cls, data=None,
195199
"supplied")
196200

197201
if data is None:
198-
_normalized = True
199-
200-
if start is not None:
201-
start = Timestamp(start)
202-
if not isinstance(start, Timestamp):
203-
raise ValueError('Failed to convert %s to timestamp'
204-
% start)
205-
206-
if normalize:
207-
start = normalize_date(start)
208-
_normalized = True
209-
else:
210-
_normalized = _normalized and start.time() == _midnight
211-
212-
if end is not None:
213-
end = Timestamp(end)
214-
if not isinstance(end, Timestamp):
215-
raise ValueError('Failed to convert %s to timestamp'
216-
% end)
217-
218-
if normalize:
219-
end = normalize_date(end)
220-
_normalized = True
221-
else:
222-
_normalized = _normalized and end.time() == _midnight
223-
224-
start, end, tz = tools._figure_out_timezone(start, end, tz)
225-
226-
if (offset._should_cache() and
227-
not (offset._normalize_cache and not _normalized) and
228-
_naive_in_cache_range(start, end)):
229-
index = cls._cached_range(start, end, periods=periods,
230-
offset=offset, name=name)
231-
else:
232-
index = _generate_regular_range(start, end, periods, offset)
233-
234-
index = index.view(cls)
235-
index.name = name
236-
index.offset = offset
237-
index.tz = tz
238-
239-
return index
202+
return cls._generate(start, end, periods, name, offset,
203+
tz=tz, normalize=normalize)
240204

241205
if not isinstance(data, np.ndarray):
242206
if np.isscalar(data):
@@ -292,6 +256,59 @@ def __new__(cls, data=None,
292256

293257
return subarr
294258

259+
@classmethod
260+
def _generate(cls, start, end, periods, name, offset,
261+
tz=None, normalize=False):
262+
_normalized = True
263+
264+
if start is not None:
265+
start = Timestamp(start)
266+
if not isinstance(start, Timestamp):
267+
raise ValueError('Failed to convert %s to timestamp'
268+
% start)
269+
270+
if normalize:
271+
start = normalize_date(start)
272+
_normalized = True
273+
else:
274+
_normalized = _normalized and start.time() == _midnight
275+
276+
if end is not None:
277+
end = Timestamp(end)
278+
if not isinstance(end, Timestamp):
279+
raise ValueError('Failed to convert %s to timestamp'
280+
% end)
281+
282+
if normalize:
283+
end = normalize_date(end)
284+
_normalized = True
285+
else:
286+
_normalized = _normalized and end.time() == _midnight
287+
288+
start, end, tz = tools._figure_out_timezone(start, end, tz)
289+
290+
if (offset._should_cache() and
291+
not (offset._normalize_cache and not _normalized) and
292+
_naive_in_cache_range(start, end)):
293+
index = cls._cached_range(start, end, periods=periods,
294+
offset=offset, name=name)
295+
else:
296+
index = _generate_regular_range(start, end, periods, offset)
297+
298+
if tz is not None:
299+
# Convert local to UTC
300+
ints = index.view('i8')
301+
lib.tz_localize_check(ints, tz)
302+
index = tz_convert(ints, tz, _utc())
303+
index = index.view('M8[us]')
304+
305+
index = index.view(cls)
306+
index.name = name
307+
index.offset = offset
308+
index.tz = tz
309+
310+
return index
311+
295312
@classmethod
296313
def _simple_new(cls, values, name, offset, tz):
297314
result = values.view(cls)
@@ -621,8 +638,8 @@ def _maybe_utc_convert(self, other):
621638
this = self
622639
if isinstance(other, DatetimeIndex):
623640
if self.tz != other.tz:
624-
this = self.tz_normalize('UTC')
625-
other = other.tz_normalize('UTC')
641+
this = self.tz_convert('UTC')
642+
other = other.tz_convert('UTC')
626643
return this, other
627644

628645
def _wrap_joined_index(self, joined, other):
@@ -1029,7 +1046,7 @@ def _view_like(self, ndarray):
10291046
result.name = self.name
10301047
return result
10311048

1032-
def tz_normalize(self, tz):
1049+
def tz_convert(self, tz):
10331050
"""
10341051
Convert DatetimeIndex from one time zone to another (using pytz)
10351052
@@ -1040,16 +1057,10 @@ def tz_normalize(self, tz):
10401057
tz = tools._maybe_get_tz(tz)
10411058

10421059
if self.tz is None:
1043-
new_dates = lib.tz_localize(self.asi8, tz)
1044-
else:
1045-
new_dates = lib.tz_convert(self.asi8, self.tz, tz)
1060+
return self.tz_localize(tz)
10461061

1047-
new_dates = new_dates.view('M8[us]')
1048-
new_dates = new_dates.view(type(self))
1049-
new_dates.offset = self.offset
1050-
new_dates.tz = tz
1051-
new_dates.name = self.name
1052-
return new_dates
1062+
# No conversion since timestamps are all UTC to begin with
1063+
return self._simple_new(self.values, self.name, self.offset, tz)
10531064

10541065
def tz_localize(self, tz):
10551066
"""
@@ -1061,16 +1072,15 @@ def tz_localize(self, tz):
10611072
"""
10621073
if self.tz is not None:
10631074
raise ValueError("Already have timezone info, "
1064-
"use tz_normalize to convert.")
1075+
"use tz_convert to convert.")
10651076
tz = tools._maybe_get_tz(tz)
10661077

1067-
new_dates = lib.tz_localize(self.asi8, tz)
1078+
lib.tz_localize_check(self.asi8, tz)
1079+
1080+
# Convert to UTC
1081+
new_dates = tz_convert(self.asi8, tz, _utc())
10681082
new_dates = new_dates.view('M8[us]')
1069-
new_dates = new_dates.view(self.__class__)
1070-
new_dates.offset = self.offset
1071-
new_dates.tz = tz
1072-
new_dates.name = self.name
1073-
return new_dates
1083+
return self._simple_new(new_dates, self.name, self.offset, tz)
10741084

10751085
def tz_validate(self):
10761086
"""
@@ -1095,6 +1105,65 @@ def tz_validate(self):
10951105

10961106
return True
10971107

1108+
def tz_convert(vals, tz1, tz2):
1109+
n = len(vals)
1110+
import pytz
1111+
# Convert to UTC
1112+
1113+
if tz1.zone != 'UTC':
1114+
utc_dates = np.empty(n, dtype=np.int64)
1115+
deltas = _get_deltas(tz1)
1116+
trans = _get_transitions(tz1)
1117+
pos = max(trans.searchsorted(vals[0], side='right') - 1, 0)
1118+
1119+
offset = deltas[pos]
1120+
for i in range(n):
1121+
v = vals[i]
1122+
if v >= trans[pos + 1]:
1123+
pos += 1
1124+
offset = deltas[pos]
1125+
utc_dates[i] = v - offset
1126+
else:
1127+
utc_dates = vals
1128+
1129+
if tz2.zone == 'UTC':
1130+
return utc_dates
1131+
1132+
# Convert UTC to other timezone
1133+
1134+
result = np.empty(n, dtype=np.int64)
1135+
trans = _get_transitions(tz2)
1136+
deltas = _get_deltas(tz2)
1137+
pos = max(trans.searchsorted(utc_dates[0], side='right') - 1, 0)
1138+
offset = deltas[pos]
1139+
for i in range(n):
1140+
v = utc_dates[i]
1141+
if v >= trans[pos + 1]:
1142+
pos += 1
1143+
offset = deltas[pos]
1144+
result[i] = v + offset
1145+
1146+
return result
1147+
1148+
trans_cache = {}
1149+
utc_offset_cache = {}
1150+
1151+
def _get_transitions(tz):
1152+
"""
1153+
Get UTC times of DST transitions
1154+
"""
1155+
if tz not in trans_cache:
1156+
arr = np.array(tz._utc_transition_times, dtype='M8[us]')
1157+
trans_cache[tz] = arr.view('i8')
1158+
return trans_cache[tz]
1159+
1160+
def _get_deltas(tz):
1161+
"""
1162+
Get UTC offsets in microseconds corresponding to DST transitions
1163+
"""
1164+
if tz not in utc_offset_cache:
1165+
utc_offset_cache[tz] = lib._unbox_utcoffsets(tz._transition_info)
1166+
return utc_offset_cache[tz]
10981167

10991168
def _generate_regular_range(start, end, periods, offset):
11001169
if com._count_not_none(start, end, periods) < 2:

0 commit comments

Comments
 (0)