Skip to content

Commit 6bac095

Browse files
committed
ENH: Int64Index.union optimization in the monotonic case
1 parent 1371d0f commit 6bac095

File tree

4 files changed

+56
-6
lines changed

4 files changed

+56
-6
lines changed

bench/zoo_bench.R

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
library(zoo)
2+
3+
indices = rep(NA, 100000)
4+
for (i in 1:100000)
5+
indices[i] <- paste(sample(letters, 10), collapse="")
6+
7+
timings <- numeric()
8+
9+
x <- zoo(rnorm(100000), indices)
10+
y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])
11+
12+
for (i in 1:10) {
13+
gc()
14+
timings[i] = system.time(x + y)[3]
15+
}
16+
17+
mean(timings)

bench/zoo_bench.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from pandas import *
2+
from pandas.util.testing import rands
3+
4+
from la import larry
5+
6+
n = 100000
7+
indices = Index([rands(10) for _ in xrange(n)])
8+
9+
def sample(values, k):
10+
from random import shuffle
11+
sampler = np.arange(len(values))
12+
shuffle(sampler)
13+
return values.take(sampler[:k])
14+
15+
subsample_size = 90000
16+
17+
x = Series(np.random.randn(100000), indices)
18+
y = Series(np.random.randn(subsample_size),
19+
index=sample(indices, subsample_size))
20+
21+
22+
lx = larry(np.random.randn(100000), [list(indices)])
23+
ly = larry(np.random.randn(subsample_size), [list(y.index)])

pandas/core/index.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,17 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
643643
else:
644644
return join_index
645645

646+
def union(self, other):
647+
if not isinstance(other, Int64Index):
648+
return Index.union(self, other)
649+
650+
if self.is_monotonic and other.is_monotonic:
651+
result = lib.outer_join_indexer_int64(self, other)[0]
652+
else:
653+
result = np.unique(np.concatenate((self, other)))
654+
return Int64Index(result)
655+
union.__doc__ = Index.union.__doc__
656+
646657
def groupby(self, to_groupby):
647658
return lib.groupby_int64(self, to_groupby)
648659

scripts/parser_magic.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1+
from pandas.util.testing import set_trace
2+
import pandas.util.testing as tm
3+
4+
from pandas import *
15
import ast
26
import inspect
37
import sys
48

5-
from pandas import *
6-
from pandas.util.testing import set_trace
7-
import pandas.util.testing as tm
8-
99
def merge(a, b):
1010
f, args, _ = parse_stmt(inspect.currentframe().f_back)
1111
return DataFrame({args[0] : a,
@@ -23,7 +23,6 @@ def parse_stmt(frame):
2323
return _parse_call(call)
2424

2525
def _parse_call(call):
26-
set_trace()
2726
func = _maybe_format_attribute(call.func)
2827

2928
str_args = []
@@ -65,4 +64,4 @@ def _format_attribute(attr):
6564

6665
a = tm.makeTimeSeries()
6766
b = tm.makeTimeSeries()
68-
df = merge(a * 2, np.log(np.exp(b)))
67+
df = merge(a, b)

0 commit comments

Comments
 (0)