Skip to content

Commit 5b6723c

Browse files
authored
BUG: DataFrame.join with left or right empty not respecting sort=True (pandas-dev#56443)
* join with empty not respecting sort param * whatsnew
1 parent fbd4fcd commit 5b6723c

File tree

6 files changed

+35
-21
lines changed

6 files changed

+35
-21
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ These are bug fixes that might have notable behavior changes.
247247

248248
In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` did not
249249
always return a result that followed the documented sort behavior. pandas now
250-
follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`).
250+
follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`, :issue:`56443`).
251251

252252
As documented, ``sort=True`` sorts the join keys lexicographically in the resulting
253253
:class:`DataFrame`. With ``sort=False``, the order of the join keys depends on the

pandas/core/indexes/base.py

+23-13
Original file line numberDiff line numberDiff line change
@@ -4576,9 +4576,6 @@ def join(
45764576
pother, how=how, level=level, return_indexers=True, sort=sort
45774577
)
45784578

4579-
lindexer: np.ndarray | None
4580-
rindexer: np.ndarray | None
4581-
45824579
# try to figure out the join level
45834580
# GH3662
45844581
if level is None and (self._is_multi or other._is_multi):
@@ -4592,25 +4589,38 @@ def join(
45924589
if level is not None and (self._is_multi or other._is_multi):
45934590
return self._join_level(other, level, how=how)
45944591

4592+
lidx: np.ndarray | None
4593+
ridx: np.ndarray | None
4594+
45954595
if len(other) == 0:
45964596
if how in ("left", "outer"):
4597-
join_index = self._view()
4598-
rindexer = np.broadcast_to(np.intp(-1), len(join_index))
4599-
return join_index, None, rindexer
4597+
if sort and not self.is_monotonic_increasing:
4598+
lidx = self.argsort()
4599+
join_index = self.take(lidx)
4600+
else:
4601+
lidx = None
4602+
join_index = self._view()
4603+
ridx = np.broadcast_to(np.intp(-1), len(join_index))
4604+
return join_index, lidx, ridx
46004605
elif how in ("right", "inner", "cross"):
46014606
join_index = other._view()
4602-
lindexer = np.array([])
4603-
return join_index, lindexer, None
4607+
lidx = np.array([], dtype=np.intp)
4608+
return join_index, lidx, None
46044609

46054610
if len(self) == 0:
46064611
if how in ("right", "outer"):
4607-
join_index = other._view()
4608-
lindexer = np.broadcast_to(np.intp(-1), len(join_index))
4609-
return join_index, lindexer, None
4612+
if sort and not other.is_monotonic_increasing:
4613+
ridx = other.argsort()
4614+
join_index = other.take(ridx)
4615+
else:
4616+
ridx = None
4617+
join_index = other._view()
4618+
lidx = np.broadcast_to(np.intp(-1), len(join_index))
4619+
return join_index, lidx, ridx
46104620
elif how in ("left", "inner", "cross"):
46114621
join_index = self._view()
4612-
rindexer = np.array([])
4613-
return join_index, None, rindexer
4622+
ridx = np.array([], dtype=np.intp)
4623+
return join_index, None, ridx
46144624

46154625
if self.dtype != other.dtype:
46164626
dtype = self._find_common_type_compat(other)

pandas/tests/frame/methods/test_combine_first.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def test_combine_first(self, float_frame, using_infer_string):
8282
tm.assert_frame_equal(comb, float_frame)
8383

8484
comb = DataFrame().combine_first(float_frame)
85-
tm.assert_frame_equal(comb, float_frame)
85+
tm.assert_frame_equal(comb, float_frame.sort_index())
8686

8787
comb = float_frame.combine_first(DataFrame(index=["faz", "boo"]))
8888
assert "faz" in comb.index

pandas/tests/frame/test_arithmetic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -629,10 +629,12 @@ def test_arith_flex_frame_corner(self, float_frame):
629629

630630
# corner cases
631631
result = float_frame.add(float_frame[:0])
632-
tm.assert_frame_equal(result, float_frame * np.nan)
632+
expected = float_frame.sort_index() * np.nan
633+
tm.assert_frame_equal(result, expected)
633634

634635
result = float_frame[:0].add(float_frame)
635-
tm.assert_frame_equal(result, float_frame * np.nan)
636+
expected = float_frame.sort_index() * np.nan
637+
tm.assert_frame_equal(result, expected)
636638

637639
with pytest.raises(NotImplementedError, match="fill_value"):
638640
float_frame.add(float_frame.iloc[0], fill_value=3)

pandas/tests/reshape/merge/test_join.py

+2
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,8 @@ def test_join_empty(left_empty, how, exp):
10231023
expected = DataFrame(columns=["B", "C"], dtype="int64")
10241024
if how != "cross":
10251025
expected = expected.rename_axis("A")
1026+
if how == "outer":
1027+
expected = expected.sort_index()
10261028

10271029
tm.assert_frame_equal(result, expected)
10281030

pandas/tests/series/test_logical_ops.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ def test_logical_operators_bool_dtype_with_empty(self):
4040
s_empty = Series([], dtype=object)
4141

4242
res = s_tft & s_empty
43-
expected = s_fff
43+
expected = s_fff.sort_index()
4444
tm.assert_series_equal(res, expected)
4545

4646
res = s_tft | s_empty
47-
expected = s_tft
47+
expected = s_tft.sort_index()
4848
tm.assert_series_equal(res, expected)
4949

5050
def test_logical_operators_int_dtype_with_int_dtype(self):
@@ -397,11 +397,11 @@ def test_logical_ops_label_based(self, using_infer_string):
397397
empty = Series([], dtype=object)
398398

399399
result = a & empty.copy()
400-
expected = Series([False, False, False], list("bca"))
400+
expected = Series([False, False, False], list("abc"))
401401
tm.assert_series_equal(result, expected)
402402

403403
result = a | empty.copy()
404-
expected = Series([True, False, True], list("bca"))
404+
expected = Series([True, True, False], list("abc"))
405405
tm.assert_series_equal(result, expected)
406406

407407
# vs non-matching

0 commit comments

Comments
 (0)