Skip to content

Commit c96306d

Browse files
Albert Villanova del Moraljreback
Albert Villanova del Moral
authored andcommitted
Add sort argument to Index.join
Issue pandas-dev#15582: fix the sort=True bug in DataFrame.join
1 parent 047b513 commit c96306d

File tree

5 files changed

+53
-7
lines changed

5 files changed

+53
-7
lines changed

pandas/indexes/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -2831,7 +2831,8 @@ def _reindex_non_unique(self, target):
28312831
new_index = self._shallow_copy_with_infer(new_labels, freq=None)
28322832
return new_index, indexer, new_indexer
28332833

2834-
def join(self, other, how='left', level=None, return_indexers=False):
2834+
def join(self, other, how='left', level=None, return_indexers=False,
2835+
sort=False):
28352836
"""
28362837
*this is an internal non-public method*
28372838
@@ -2844,6 +2845,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
28442845
how : {'left', 'right', 'inner', 'outer'}
28452846
level : int or level name, default None
28462847
return_indexers : boolean, default False
2848+
sort : boolean, default False
28472849
28482850
Returns
28492851
-------
@@ -2929,6 +2931,9 @@ def join(self, other, how='left', level=None, return_indexers=False):
29292931
elif how == 'outer':
29302932
join_index = self.union(other)
29312933

2934+
if sort:
2935+
join_index = join_index.sort_values()
2936+
29322937
if return_indexers:
29332938
if join_index is self:
29342939
lindexer = None

pandas/indexes/range.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ def union(self, other):
431431

432432
return self._int64index.union(other)
433433

434-
def join(self, other, how='left', level=None, return_indexers=False):
434+
def join(self, other, how='left', level=None, return_indexers=False,
435+
sort=False):
435436
"""
436437
*this is an internal non-public method*
437438
@@ -444,16 +445,19 @@ def join(self, other, how='left', level=None, return_indexers=False):
444445
how : {'left', 'right', 'inner', 'outer'}
445446
level : int or level name, default None
446447
return_indexers : boolean, default False
448+
sort : boolean, default False
447449
448450
Returns
449451
-------
450452
join_index, (left_indexer, right_indexer)
451453
"""
452454
if how == 'outer' and self is not other:
453455
# note: could return RangeIndex in more circumstances
454-
return self._int64index.join(other, how, level, return_indexers)
456+
return self._int64index.join(other, how, level, return_indexers,
457+
sort)
455458

456-
return super(RangeIndex, self).join(other, how, level, return_indexers)
459+
return super(RangeIndex, self).join(other, how, level, return_indexers,
460+
sort)
457461

458462
def __len__(self):
459463
"""

pandas/tests/frame/test_join.py

+35
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,41 @@ def test_join(self):
4141
index=[0, 1, 2, 3])
4242
tm.assert_frame_equal(result, expected)
4343

44+
def test_join_sort(self):
45+
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
46+
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
47+
48+
result = df1.join(df2, sort=True)
49+
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [None, 100, 200]},
50+
index=[0, 1, 2])
51+
tm.assert_frame_equal(result, expected)
52+
53+
result = df1.join(df2, how='left', sort=True)
54+
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [None, 100, 200]},
55+
index=[0, 1, 2])
56+
tm.assert_frame_equal(result, expected)
57+
58+
result = df1.join(df2, how='right', sort=True)
59+
expected = pd.DataFrame({'a': [10, 20, None], 'b': [100, 200, 300]},
60+
index=[1, 2, 3])
61+
tm.assert_frame_equal(result, expected)
62+
63+
result = df2.join(df1, how='right', sort=True)
64+
expected = pd.DataFrame([[None, 0], [100, 10], [200, 20]],
65+
columns=['b', 'a'], index=[0, 1, 2])
66+
tm.assert_frame_equal(result, expected)
67+
68+
result = df1.join(df2, how='inner', sort=True)
69+
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
70+
index=[1, 2])
71+
tm.assert_frame_equal(result, expected)
72+
73+
result = df1.join(df2, how='outer', sort=True)
74+
expected = pd.DataFrame({'a': [0, 10, 20, None],
75+
'b': [None, 100, 200, 300]},
76+
index=[0, 1, 2, 3])
77+
tm.assert_frame_equal(result, expected)
78+
4479
def test_join_index(self):
4580
# left / right
4681

pandas/tools/merge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,8 @@ def _get_join_info(self):
733733

734734
if self.left_index and self.right_index and self.how != 'asof':
735735
join_index, left_indexer, right_indexer = \
736-
left_ax.join(right_ax, how=self.how, return_indexers=True)
736+
left_ax.join(right_ax, how=self.how, return_indexers=True,
737+
sort=self.sort)
737738
elif self.right_index and self.how == 'left':
738739
join_index, left_indexer, right_indexer = \
739740
_left_join_on_index(left_ax, right_ax, self.left_join_keys,

pandas/tseries/index.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1037,7 +1037,8 @@ def union_many(self, others):
10371037
this.offset = to_offset(this.inferred_freq)
10381038
return this
10391039

1040-
def join(self, other, how='left', level=None, return_indexers=False):
1040+
def join(self, other, how='left', level=None, return_indexers=False,
1041+
sort=False):
10411042
"""
10421043
See Index.join
10431044
"""
@@ -1051,7 +1052,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
10511052

10521053
this, other = self._maybe_utc_convert(other)
10531054
return Index.join(this, other, how=how, level=level,
1054-
return_indexers=return_indexers)
1055+
return_indexers=return_indexers, sort=sort)
10551056

10561057
def _maybe_utc_convert(self, other):
10571058
this = self

0 commit comments

Comments
 (0)