Skip to content

Commit 56e0c9f

Browse files
committed
ENH: Series.align method to leverage faster joins, faster binary ops
1 parent 6bac095 commit 56e0c9f

File tree

3 files changed

+67
-23
lines changed

3 files changed

+67
-23
lines changed

bench/zoo_bench.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ def sample(values, k):
2121

2222
lx = larry(np.random.randn(100000), [list(indices)])
2323
ly = larry(np.random.randn(subsample_size), [list(y.index)])
24+

pandas/core/index.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,24 @@ def union(self, other):
248248
if len(self) == 0:
249249
return _ensure_index(other)
250250

251-
indexer = self.get_indexer(other)
252-
indexer = (indexer == -1).nonzero()[0]
253-
254-
if len(indexer) > 0:
255-
other_diff = other.values.take(indexer)
256-
result = list(self) + list(other_diff)
251+
if self.is_monotonic and other.is_monotonic:
252+
result = lib.outer_join_indexer_object(self, other)[0]
257253
else:
258-
# contained in
259-
result = list(self)
254+
indexer = self.get_indexer(other)
255+
indexer = (indexer == -1).nonzero()[0]
260256

261-
# timsort wins
262-
try:
263-
result.sort()
264-
except Exception:
265-
pass
257+
if len(indexer) > 0:
258+
other_diff = other.values.take(indexer)
259+
result = list(self) + list(other_diff)
260+
else:
261+
# contained in
262+
result = list(self)
263+
264+
# timsort wins
265+
try:
266+
result.sort()
267+
except Exception:
268+
pass
266269

267270
# for subclasses
268271
return self._wrap_union_result(other, result)
@@ -292,9 +295,14 @@ def intersection(self, other):
292295
if self.equals(other):
293296
return self
294297

295-
indexer = self.get_indexer(other)
296-
indexer = indexer.take((indexer != -1).nonzero()[0])
297-
return self.take(indexer)
298+
other = _ensure_index(other)
299+
300+
if self.is_monotonic and other.is_monotonic:
301+
return Index(lib.inner_join_indexer_object(self, other)[0])
302+
else:
303+
indexer = self.get_indexer(other)
304+
indexer = indexer.take((indexer != -1).nonzero()[0])
305+
return self.take(indexer)
298306

299307
def diff(self, other):
300308
"""

pandas/core/series.py

+42-7
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@ def wrapper(self, other):
4141
if self.index.equals(other.index):
4242
return Series(op(self.values, other.values), index=self.index)
4343

44-
new_index = self.index + other.index
45-
this_reindexed = self.reindex(new_index)
46-
other_reindexed = other.reindex(new_index)
44+
this_reindexed, other_reindexed = self.align(other, join='outer',
45+
copy=False)
4746
arr = op(this_reindexed.values, other_reindexed.values)
48-
return Series(arr, index=new_index)
47+
return Series(arr, index=this_reindexed.index)
4948
elif isinstance(other, DataFrame):
5049
return NotImplemented
5150
else:
@@ -955,9 +954,8 @@ def _binop(self, other, func, fill_value=None):
955954
this = self
956955

957956
if not self.index.equals(other.index):
958-
new_index = self.index + other.index
959-
this = self.reindex(new_index)
960-
other = other.reindex(new_index)
957+
this, other = self.align(other, join='outer')
958+
new_index = this.index
961959

962960
this_vals = this.values
963961
other_vals = other.values
@@ -1265,6 +1263,43 @@ def apply(self, func):
12651263
except Exception:
12661264
return Series([func(x) for x in self], index=self.index)
12671265

1266+
def align(self, other, join='outer', copy=True):
1267+
"""
1268+
Align two Series object with the specified join method
1269+
1270+
Parameters
1271+
----------
1272+
other : Series
1273+
join : {'outer', 'inner', 'left', 'right'}, default 'outer'
1274+
1275+
Returns
1276+
-------
1277+
(left, right) : (Series, Series)
1278+
Aligned Series
1279+
"""
1280+
join_index, lidx, ridx = self.index.join(other.index, how=join,
1281+
return_indexers=True)
1282+
1283+
if lidx is not None:
1284+
left = Series(common.take_1d(self.values, lidx), join_index)
1285+
else:
1286+
if copy:
1287+
new_values = self.values.copy()
1288+
else:
1289+
new_values = self.values
1290+
left = Series(new_values, join_index)
1291+
1292+
if ridx is not None:
1293+
right = Series(common.take_1d(other.values, ridx), join_index)
1294+
else:
1295+
if copy:
1296+
new_values = other.values.copy()
1297+
else:
1298+
new_values = other.values
1299+
right = Series(new_values, join_index)
1300+
1301+
return left, right
1302+
12681303
def reindex(self, index=None, method=None, copy=True):
12691304
"""Conform Series to new index with optional filling logic, placing
12701305
NA/NaN in locations having no value in the previous index. A new object

0 commit comments

Comments
 (0)