Skip to content

Commit ae56d6f

Browse files
committed
ENH: at_time/between_time work with tz-localized time series. refactoring and cleanup close #1481
1 parent 6a67e3b commit ae56d6f

File tree

5 files changed

+151
-210
lines changed

5 files changed

+151
-210
lines changed

pandas/core/series.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -2773,26 +2773,23 @@ def _repr_footer(self):
27732773
namestr = "Name: %s, " % str(self.name) if self.name else ""
27742774
return '%s%sLength: %d' % (freqstr, namestr, len(self))
27752775

2776-
def at_time(self, time, tz=None, asof=False):
2776+
def at_time(self, time, asof=False):
27772777
"""
27782778
Select values at particular time of day (e.g. 9:30AM)
27792779
27802780
Parameters
27812781
----------
27822782
time : datetime.time or string
2783-
tz : string or pytz.timezone
2784-
Time zone for time. Corresponding timestamps would be converted to
2785-
time zone of the TimeSeries
27862783
27872784
Returns
27882785
-------
27892786
values_at_time : TimeSeries
27902787
"""
2791-
from pandas.tseries.resample import values_at_time
2792-
return values_at_time(self, time, tz=tz, asof=asof)
2788+
indexer = self.index.indexer_at_time(time, asof=asof)
2789+
return self.take(indexer)
27932790

27942791
def between_time(self, start_time, end_time, include_start=True,
2795-
include_end=True, tz=None):
2792+
include_end=True):
27962793
"""
27972794
Select values between particular times of the day (e.g., 9:00-9:30 AM)
27982795
@@ -2802,16 +2799,15 @@ def between_time(self, start_time, end_time, include_start=True,
28022799
end_time : datetime.time or string
28032800
include_start : boolean, default True
28042801
include_end : boolean, default True
2805-
tz : string or pytz.timezone, default None
28062802
28072803
Returns
28082804
-------
28092805
values_between_time : TimeSeries
28102806
"""
2811-
from pandas.tseries.resample import values_between_time
2812-
return values_between_time(self, start_time, end_time, tz=tz,
2813-
include_start=include_start,
2814-
include_end=include_end)
2807+
indexer = self.index.indexer_between_time(
2808+
start_time, end_time, include_start=include_start,
2809+
include_end=include_end)
2810+
return self.take(indexer)
28152811

28162812
def to_timestamp(self, freq=None, how='start', copy=True):
28172813
"""

pandas/src/datetime.pyx

+20-104
Original file line numberDiff line numberDiff line change
@@ -409,8 +409,7 @@ cdef class _Timestamp(datetime):
409409
return datetime.__sub__(self, other)
410410

411411
cpdef _get_field(self, field):
412-
out = fast_field_accessor(np.array([self.value], dtype=np.int64),
413-
field)
412+
out = get_date_field(np.array([self.value], dtype=np.int64), field)
414413
return out[0]
415414

416415

@@ -1049,8 +1048,26 @@ def build_field_sarray(ndarray[int64_t] dtindex):
10491048

10501049
return out
10511050

1051+
def get_time_micros(ndarray[int64_t] dtindex):
1052+
'''
1053+
Datetime as int64 representation to a structured array of fields
1054+
'''
1055+
cdef:
1056+
Py_ssize_t i, n = len(dtindex)
1057+
pandas_datetimestruct dts
1058+
ndarray[int64_t] micros
1059+
1060+
micros = np.empty(n, dtype=np.int64)
1061+
1062+
for i in range(n):
1063+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
1064+
micros[i] = 1000000LL * (dts.hour * 60 * 60 +
1065+
60 * dts.min + dts.sec) + dts.us
1066+
1067+
return micros
1068+
10521069
@cython.wraparound(False)
1053-
def fast_field_accessor(ndarray[int64_t] dtindex, object field):
1070+
def get_date_field(ndarray[int64_t] dtindex, object field):
10541071
'''
10551072
Given a int64-based datetime index, extract the year, month, etc.,
10561073
field and return an array of these values.
@@ -1178,107 +1195,6 @@ cdef inline int m8_weekday(int64_t val):
11781195

11791196
cdef int64_t DAY_NS = 86400000000000LL
11801197

1181-
def values_at_time(ndarray[int64_t] stamps, int64_t time):
1182-
cdef:
1183-
Py_ssize_t i, j, count, n = len(stamps)
1184-
ndarray[int64_t] indexer, times
1185-
int64_t last, cur
1186-
1187-
# Assumes stamps is sorted
1188-
1189-
if len(stamps) == 0:
1190-
return np.empty(0, dtype=np.int64)
1191-
1192-
# is this OK?
1193-
# days = stamps // DAY_NS
1194-
times = stamps % DAY_NS
1195-
1196-
# Nanosecond resolution
1197-
count = 0
1198-
for i in range(n):
1199-
if times[i] == time:
1200-
count += 1
1201-
1202-
indexer = np.empty(count, dtype=np.int64)
1203-
1204-
j = 0
1205-
# last = days[0]
1206-
for i in range(n):
1207-
if times[i] == time:
1208-
indexer[j] = i
1209-
j += 1
1210-
1211-
return indexer
1212-
1213-
def values_between_time(ndarray[int64_t] stamps, int64_t stime, int64_t etime,
1214-
bint include_start, bint include_end):
1215-
cdef:
1216-
Py_ssize_t i, j, count, n = len(stamps)
1217-
ndarray[int64_t] indexer, times
1218-
int64_t last, cur
1219-
1220-
# Assumes stamps is sorted
1221-
1222-
if len(stamps) == 0:
1223-
return np.empty(0, dtype=np.int64)
1224-
1225-
# is this OK?
1226-
# days = stamps // DAY_NS
1227-
times = stamps % DAY_NS
1228-
1229-
# Nanosecond resolution
1230-
count = 0
1231-
if include_start and include_end:
1232-
for i in range(n):
1233-
cur = times[i]
1234-
if cur >= stime and cur <= etime:
1235-
count += 1
1236-
elif include_start:
1237-
for i in range(n):
1238-
cur = times[i]
1239-
if cur >= stime and cur < etime:
1240-
count += 1
1241-
elif include_end:
1242-
for i in range(n):
1243-
cur = times[i]
1244-
if cur > stime and cur <= etime:
1245-
count += 1
1246-
else:
1247-
for i in range(n):
1248-
cur = times[i]
1249-
if cur > stime and cur < etime:
1250-
count += 1
1251-
1252-
indexer = np.empty(count, dtype=np.int64)
1253-
1254-
j = 0
1255-
# last = days[0]
1256-
if include_start and include_end:
1257-
for i in range(n):
1258-
cur = times[i]
1259-
if cur >= stime and cur <= etime:
1260-
indexer[j] = i
1261-
j += 1
1262-
elif include_start:
1263-
for i in range(n):
1264-
cur = times[i]
1265-
if cur >= stime and cur < etime:
1266-
indexer[j] = i
1267-
j += 1
1268-
elif include_end:
1269-
for i in range(n):
1270-
cur = times[i]
1271-
if cur > stime and cur <= etime:
1272-
indexer[j] = i
1273-
j += 1
1274-
else:
1275-
for i in range(n):
1276-
cur = times[i]
1277-
if cur > stime and cur < etime:
1278-
indexer[j] = i
1279-
j += 1
1280-
1281-
return indexer
12821198

12831199
def date_normalize(ndarray[int64_t] stamps):
12841200
cdef:

pandas/tseries/index.py

+102-15
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def f(self):
3939
utc = _utc()
4040
if self.tz is not utc:
4141
values = lib.tz_convert(values, utc, self.tz)
42-
return lib.fast_field_accessor(values, field)
42+
return lib.get_date_field(values, field)
4343
f.__name__ = name
4444
return property(f)
4545

@@ -578,9 +578,23 @@ def asi8(self):
578578
@property
579579
def asstruct(self):
580580
if self._sarr_cache is None:
581-
self._sarr_cache = lib.build_field_sarray(self.asi8)
581+
self._sarr_cache = self._get_field_sarr()
582582
return self._sarr_cache
583583

584+
def _get_field_sarr(self):
585+
utc = _utc()
586+
values = self.asi8
587+
if self.tz is not None and self.tz is not utc:
588+
values = lib.tz_convert(values, utc, self.tz)
589+
return lib.build_field_sarray(values)
590+
591+
def _get_time_micros(self):
592+
utc = _utc()
593+
values = self.asi8
594+
if self.tz is not None and self.tz is not utc:
595+
values = lib.tz_convert(values, utc, self.tz)
596+
return lib.get_time_micros(values)
597+
584598
@property
585599
def asobject(self):
586600
"""
@@ -712,7 +726,7 @@ def take(self, indices, axis=0):
712726
return self[maybe_slice]
713727
indices = com._ensure_platform_int(indices)
714728
taken = self.values.take(indices, axis=axis)
715-
return DatetimeIndex(taken, tz=self.tz, name=self.name)
729+
return self._simple_new(taken, self.name, None, self.tz)
716730

717731
def union(self, other):
718732
"""
@@ -943,7 +957,7 @@ def get_value(self, series, key):
943957
pass
944958

945959
if isinstance(key, time):
946-
locs = self._indices_at_time(key)
960+
locs = self.indexer_at_time(key)
947961
return series.take(locs)
948962

949963
stamp = Timestamp(key)
@@ -969,22 +983,13 @@ def get_loc(self, key):
969983
pass
970984

971985
if isinstance(key, time):
972-
return self._indices_at_time(key)
986+
return self.indexer_at_time(key)
973987

974988
try:
975989
return self._engine.get_loc(Timestamp(key))
976990
except (KeyError, ValueError):
977991
raise KeyError(key)
978992

979-
def _indices_at_time(self, key):
980-
from dateutil.parser import parse
981-
982-
# TODO: time object with tzinfo?
983-
984-
nanos = _time_to_nanosecond(key)
985-
indexer = lib.values_at_time(self.asi8, nanos)
986-
return com._ensure_platform_int(indexer)
987-
988993
def _get_string_slice(self, key):
989994
freq = getattr(self, 'freqstr',
990995
getattr(self, 'inferred_freq', None))
@@ -1246,6 +1251,84 @@ def tz_validate(self):
12461251

12471252
return True
12481253

1254+
def indexer_at_time(self, time, asof=False):
1255+
"""
1256+
Select values at particular time of day (e.g. 9:30AM)
1257+
1258+
Parameters
1259+
----------
1260+
time : datetime.time or string
1261+
tz : string or pytz.timezone
1262+
Time zone for time. Corresponding timestamps would be converted to
1263+
time zone of the TimeSeries
1264+
1265+
Returns
1266+
-------
1267+
values_at_time : TimeSeries
1268+
"""
1269+
from dateutil.parser import parse
1270+
1271+
if asof:
1272+
raise NotImplementedError
1273+
1274+
if isinstance(time, basestring):
1275+
time = parse(time).time()
1276+
1277+
if time.tzinfo:
1278+
# TODO
1279+
raise NotImplementedError
1280+
1281+
time_micros = self._get_time_micros()
1282+
micros = _time_to_micros(time)
1283+
return (micros == time_micros).nonzero()[0]
1284+
1285+
def indexer_between_time(self, start_time, end_time, include_start=True,
1286+
include_end=True):
1287+
"""
1288+
Select values between particular times of day (e.g., 9:00-9:30AM)
1289+
1290+
Parameters
1291+
----------
1292+
start_time : datetime.time or string
1293+
end_time : datetime.time or string
1294+
include_start : boolean, default True
1295+
include_end : boolean, default True
1296+
tz : string or pytz.timezone, default None
1297+
1298+
Returns
1299+
-------
1300+
values_between_time : TimeSeries
1301+
"""
1302+
from dateutil.parser import parse
1303+
1304+
if isinstance(start_time, basestring):
1305+
start_time = parse(start_time).time()
1306+
1307+
if isinstance(end_time, basestring):
1308+
end_time = parse(end_time).time()
1309+
1310+
if start_time.tzinfo or end_time.tzinfo:
1311+
raise NotImplementedError
1312+
1313+
time_micros = self._get_time_micros()
1314+
start_micros = _time_to_micros(start_time)
1315+
end_micros = _time_to_micros(end_time)
1316+
1317+
if include_start and include_end:
1318+
mask = ((start_micros <= time_micros) &
1319+
(time_micros <= end_micros))
1320+
elif include_start:
1321+
mask = ((start_micros <= time_micros) &
1322+
(time_micros < end_micros))
1323+
elif include_end:
1324+
mask = ((start_micros < time_micros) &
1325+
(time_micros <= end_micros))
1326+
else:
1327+
mask = ((start_micros < time_micros) &
1328+
(time_micros < end_micros))
1329+
1330+
return mask.nonzero()[0]
1331+
12491332
def _generate_regular_range(start, end, periods, offset):
12501333
if com._count_not_none(start, end, periods) < 2:
12511334
raise ValueError('Must specify two of start, end, or periods')
@@ -1399,8 +1482,11 @@ def _in_range(start, end, rng_start, rng_end):
13991482
return start > rng_start and end < rng_end
14001483

14011484
def _time_to_nanosecond(time):
1485+
return _time_to_micros(time) * 1000
1486+
1487+
def _time_to_micros(time):
14021488
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
1403-
return (1000000 * seconds + time.microsecond) * 1000
1489+
return 1000000 * seconds + time.microsecond
14041490

14051491

14061492
def _concat(to_concat):
@@ -1410,3 +1496,4 @@ def _concat(to_concat):
14101496
return new_values.view(_NS_DTYPE)
14111497
else:
14121498
return np.concatenate(to_concat)
1499+

0 commit comments

Comments
 (0)