Skip to content

Commit b13b04f

Browse files
author
Albert Villanova del Moral
committed
Add sort argument to Index.join
Issue pandas-dev#15582: fix the sort=True bug in DataFrame.join
1 parent a48aee7 commit b13b04f

File tree

5 files changed

+53
-7
lines changed

5 files changed

+53
-7
lines changed

pandas/indexes/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -2788,7 +2788,8 @@ def _reindex_non_unique(self, target):
27882788
new_index = self._shallow_copy_with_infer(new_labels, freq=None)
27892789
return new_index, indexer, new_indexer
27902790

2791-
def join(self, other, how='left', level=None, return_indexers=False):
2791+
def join(self, other, how='left', level=None, return_indexers=False,
2792+
sort=False):
27922793
"""
27932794
*this is an internal non-public method*
27942795
@@ -2801,6 +2802,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
28012802
how : {'left', 'right', 'inner', 'outer'}
28022803
level : int or level name, default None
28032804
return_indexers : boolean, default False
2805+
sort : boolean, default False
28042806
28052807
Returns
28062808
-------
@@ -2886,6 +2888,9 @@ def join(self, other, how='left', level=None, return_indexers=False):
28862888
elif how == 'outer':
28872889
join_index = self.union(other)
28882890

2891+
if sort:
2892+
join_index = join_index.sort_values()
2893+
28892894
if return_indexers:
28902895
if join_index is self:
28912896
lindexer = None

pandas/indexes/range.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ def union(self, other):
431431

432432
return self._int64index.union(other)
433433

434-
def join(self, other, how='left', level=None, return_indexers=False):
434+
def join(self, other, how='left', level=None, return_indexers=False,
435+
sort=False):
435436
"""
436437
*this is an internal non-public method*
437438
@@ -444,16 +445,19 @@ def join(self, other, how='left', level=None, return_indexers=False):
444445
how : {'left', 'right', 'inner', 'outer'}
445446
level : int or level name, default None
446447
return_indexers : boolean, default False
448+
sort : boolean, default False
447449
448450
Returns
449451
-------
450452
join_index, (left_indexer, right_indexer)
451453
"""
452454
if how == 'outer' and self is not other:
453455
# note: could return RangeIndex in more circumstances
454-
return self._int64index.join(other, how, level, return_indexers)
456+
return self._int64index.join(other, how, level, return_indexers,
457+
sort)
455458

456-
return super(RangeIndex, self).join(other, how, level, return_indexers)
459+
return super(RangeIndex, self).join(other, how, level, return_indexers,
460+
sort)
457461

458462
def __len__(self):
459463
"""

pandas/tests/frame/test_join.py

+35
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,41 @@ def test_join(self):
4141
index=[0, 1, 2, 3])
4242
tm.assert_frame_equal(result, expected)
4343

44+
def test_join_sort(self):
45+
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
46+
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
47+
48+
result = df1.join(df2, sort=True)
49+
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [None, 100, 200]},
50+
index=[0, 1, 2])
51+
tm.assert_frame_equal(result, expected)
52+
53+
result = df1.join(df2, how='left', sort=True)
54+
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [None, 100, 200]},
55+
index=[0, 1, 2])
56+
tm.assert_frame_equal(result, expected)
57+
58+
result = df1.join(df2, how='right', sort=True)
59+
expected = pd.DataFrame({'a': [10, 20, None], 'b': [100, 200, 300]},
60+
index=[1, 2, 3])
61+
tm.assert_frame_equal(result, expected)
62+
63+
result = df2.join(df1, how='right', sort=True)
64+
expected = pd.DataFrame([[None, 0], [100, 10], [200, 20]],
65+
columns=['b', 'a'], index=[0, 1, 2])
66+
tm.assert_frame_equal(result, expected)
67+
68+
result = df1.join(df2, how='inner', sort=True)
69+
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
70+
index=[1, 2])
71+
tm.assert_frame_equal(result, expected)
72+
73+
result = df1.join(df2, how='outer', sort=True)
74+
expected = pd.DataFrame({'a': [0, 10, 20, None],
75+
'b': [None, 100, 200, 300]},
76+
index=[0, 1, 2, 3])
77+
tm.assert_frame_equal(result, expected)
78+
4479
def test_join_index(self):
4580
# left / right
4681

pandas/tools/merge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,8 @@ def _get_join_info(self):
729729

730730
if self.left_index and self.right_index and self.how != 'asof':
731731
join_index, left_indexer, right_indexer = \
732-
left_ax.join(right_ax, how=self.how, return_indexers=True)
732+
left_ax.join(right_ax, how=self.how, return_indexers=True,
733+
sort=self.sort)
733734
elif self.right_index and self.how == 'left':
734735
join_index, left_indexer, right_indexer = \
735736
_left_join_on_index(left_ax, right_ax, self.left_join_keys,

pandas/tseries/index.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,8 @@ def union_many(self, others):
10361036
this.offset = to_offset(this.inferred_freq)
10371037
return this
10381038

1039-
def join(self, other, how='left', level=None, return_indexers=False):
1039+
def join(self, other, how='left', level=None, return_indexers=False,
1040+
sort=False):
10401041
"""
10411042
See Index.join
10421043
"""
@@ -1050,7 +1051,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
10501051

10511052
this, other = self._maybe_utc_convert(other)
10521053
return Index.join(this, other, how=how, level=level,
1053-
return_indexers=return_indexers)
1054+
return_indexers=return_indexers, sort=sort)
10541055

10551056
def _maybe_utc_convert(self, other):
10561057
this = self

0 commit comments

Comments
 (0)