Skip to content

Commit 191200f

Browse files
committed
ENH: fast DateRange.intersection in some cases and sped up DateRange.union in some cases. address GH #178
1 parent aaea503 commit 191200f

File tree

3 files changed

+99
-7
lines changed

3 files changed

+99
-7
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ This is an incremental bug fix and performance enhancement release
5353
- Throw exception when step specified in label-based slice (GH #185)
5454
- Fix isnull to correctly work with np.float32. Fix upstream bug described in
5555
GH #182
56+
- Finish implementation of as_index=False in groupby for DataFrame
57+
aggregation (GH #181)
5658

5759
pandas 0.4.1
5860
============

pandas/core/daterange.py

+66-7
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@ def __new__(cls, start=None, end=None, periods=None,
6666
end = datetools.to_datetime(end)
6767

6868
# inside cache range. Handle UTC case
69-
70-
useCache = (offset.isAnchored() and
71-
isinstance(offset, datetools.CacheableOffset))
69+
useCache = _will_use_cache(offset)
7270

7371
start, end, tzinfo = _figure_out_timezone(start, end, tzinfo)
7472
useCache = useCache and _naive_in_cache_range(start, end)
@@ -285,16 +283,72 @@ def union(self, other):
285283
else:
286284
left, right = other, self
287285

288-
left_start, left_end = left[0], left[-1]
289-
right_start, right_end = right[0], right[-1]
286+
left_end = left[-1]
287+
right_start = right[0]
290288

291289
# Only need to "adjoin", not overlap
292290
if (left_end + offset) >= right_start:
293-
return DateRange(left_start, max(left_end, right_end),
294-
offset=offset)
291+
return left._fast_union(right)
295292
else:
296293
return Index.union(self, other)
297294

295+
def intersection(self, other):
296+
"""
297+
Specialized intersection for DateRange objects. May be much faster than
298+
Index.union
299+
300+
Parameters
301+
----------
302+
other : DateRange or array-like
303+
304+
Returns
305+
-------
306+
y : Index or DateRange
307+
"""
308+
if not isinstance(other, DateRange) or other.offset != self.offset:
309+
return Index.intersection(self.view(Index), other)
310+
311+
# to make our life easier, "sort" the two ranges
312+
if self[0] <= other[0]:
313+
left, right = self, other
314+
else:
315+
left, right = other, self
316+
317+
left_end = left[-1]
318+
right_start = right[0]
319+
320+
if left_end < right_start:
321+
return Index([])
322+
else:
323+
lslice = slice(*left.slice_locs(right_start, None))
324+
left_chunk = left.values[lslice]
325+
return self._view_like(left_chunk)
326+
327+
def _fast_union(self, other):
328+
left, right = self, other
329+
330+
left_start, left_end = left[0], left[-1]
331+
right_end = right[-1]
332+
333+
if not _will_use_cache(self.offset):
334+
# concatenate dates
335+
if left_end < right_end:
336+
loc = right.searchsorted(left_end, side='right')
337+
right_chunk = right.values[loc:]
338+
dates = np.concatenate((left.values, right_chunk))
339+
return self._view_like(dates)
340+
else:
341+
return left
342+
else:
343+
return DateRange(left_start, max(left_end, right_end),
344+
offset=left.offset)
345+
346+
def _view_like(self, ndarray):
347+
result = ndarray.view(DateRange)
348+
result.offset = self.offset
349+
result.tzinfo = self.tzinfo
350+
return result
351+
298352
def _wrap_union_result(self, other, result):
299353
return Index(result)
300354

@@ -484,6 +538,11 @@ def _infer(a, b):
484538
tz = _infer(end, start)
485539
return tz
486540

541+
def _will_use_cache(offset):
542+
return (offset.isAnchored() and
543+
isinstance(offset, datetools.CacheableOffset))
544+
545+
487546
if __name__ == '__main__':
488547
import pytz
489548
# just want it to work

pandas/tests/test_daterange.py

+31
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,37 @@ def test_union(self):
162162
the_union = self.rng.union(rng)
163163
self.assert_(not isinstance(the_union, DateRange))
164164

165+
def test_union_not_cacheable(self):
166+
rng = DateRange('1/1/2000', periods=50, offset=datetools.Minute())
167+
rng1 = rng[10:]
168+
rng2 = rng[:25]
169+
the_union = rng1.union(rng2)
170+
self.assert_(the_union.equals(rng))
171+
172+
rng1 = rng[10:]
173+
rng2 = rng[15:35]
174+
the_union = rng1.union(rng2)
175+
expected = rng[10:]
176+
self.assert_(the_union.equals(expected))
177+
178+
def test_intersection(self):
179+
rng = DateRange('1/1/2000', periods=50, offset=datetools.Minute())
180+
rng1 = rng[10:]
181+
rng2 = rng[:25]
182+
the_int = rng1.intersection(rng2)
183+
expected = rng[10:25]
184+
self.assert_(the_int.equals(expected))
185+
self.assert_(isinstance(the_int, DateRange))
186+
self.assert_(the_int.offset == rng.offset)
187+
188+
the_int = rng1.intersection(rng2.view(Index))
189+
self.assert_(the_int.equals(expected))
190+
191+
# non-overlapping
192+
the_int = rng[:10].intersection(rng[10:])
193+
expected = Index([])
194+
self.assert_(the_int.equals(expected))
195+
165196
def test_with_tzinfo(self):
166197
_skip_if_no_pytz()
167198
tz = pytz.timezone('US/Central')

0 commit comments

Comments
 (0)