Skip to content

Commit 75461a2

Browse files
committed
ENH: work toward joins on non-unique indexes, many-to-one support per #1306
1 parent abd1f67 commit 75461a2

File tree

6 files changed

+1317
-393
lines changed

6 files changed

+1317
-393
lines changed

pandas/core/index.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class Index(np.ndarray):
5959
# Cython methods
6060
_groupby = _algos.groupby_object
6161
_arrmap = _algos.arrmap_object
62+
_left_indexer_unique = _algos.left_join_indexer_unique_object
6263
_left_indexer = _algos.left_join_indexer_object
6364
_inner_indexer = _algos.inner_join_indexer_object
6465
_outer_indexer = _algos.outer_join_indexer_object
@@ -726,6 +727,7 @@ def _possibly_promote(self, other):
726727
def _get_indexer_standard(self, other):
727728
if (self.dtype != np.object_ and
728729
self.is_monotonic and other.is_monotonic):
730+
# TODO: unique vs non unique
729731
return self._left_indexer(other, self)
730732
else:
731733
return self._engine.get_indexer(other)
@@ -938,21 +940,35 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
938940
else:
939941
return ret_index
940942

941-
if how == 'left':
942-
join_index = self
943-
lidx = None
944-
ridx = self._left_indexer(self, other)
945-
elif how == 'right':
946-
join_index = other
947-
lidx = self._left_indexer(other, self)
948-
ridx = None
949-
elif how == 'inner':
950-
join_index, lidx, ridx = self._inner_indexer(self.values,
951-
other.values)
952-
join_index = self._wrap_joined_index(join_index, other)
953-
elif how == 'outer':
954-
join_index, lidx, ridx = self._outer_indexer(self.values,
955-
other.values)
943+
if self.is_unique and other.is_unique:
944+
# We can perform much better than the general case
945+
if how == 'left':
946+
join_index = self
947+
lidx = None
948+
ridx = self._left_indexer_unique(self, other)
949+
elif how == 'right':
950+
join_index = other
951+
lidx = self._left_indexer_unique(other, self)
952+
ridx = None
953+
elif how == 'inner':
954+
join_index, lidx, ridx = self._inner_indexer(self.values,
955+
other.values)
956+
join_index = self._wrap_joined_index(join_index, other)
957+
elif how == 'outer':
958+
join_index, lidx, ridx = self._outer_indexer(self.values,
959+
other.values)
960+
join_index = self._wrap_joined_index(join_index, other)
961+
else:
962+
if how == 'left':
963+
join_index, lidx, ridx = self._left_indexer(self, other)
964+
elif how == 'right':
965+
join_index, ridx, lidx = self._left_indexer(other, self)
966+
elif how == 'inner':
967+
join_index, lidx, ridx = self._inner_indexer(self.values,
968+
other.values)
969+
elif how == 'outer':
970+
join_index, lidx, ridx = self._outer_indexer(self.values,
971+
other.values)
956972
join_index = self._wrap_joined_index(join_index, other)
957973

958974
if return_indexers:
@@ -1074,6 +1090,7 @@ class Int64Index(Index):
10741090

10751091
_groupby = _algos.groupby_int64
10761092
_arrmap = _algos.arrmap_int64
1093+
_left_indexer_unique = _algos.left_join_indexer_unique_int64
10771094
_left_indexer = _algos.left_join_indexer_int64
10781095
_inner_indexer = _algos.inner_join_indexer_int64
10791096
_outer_indexer = _algos.outer_join_indexer_int64

0 commit comments

Comments
 (0)