Skip to content

Commit 71062f6

Browse files
authored
REGR: join segfaulting for arrow string with nulls (#55348)
* REGR: join segfaulting for arrow string with nulls * Fix not installed
1 parent 618bf88 commit 71062f6

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

doc/source/whatsnew/v2.1.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
1717
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
18-
-
18+
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
1919

2020
.. ---------------------------------------------------------------------------
2121
.. _whatsnew_212.bug_fixes:

pandas/core/reshape/merge.py

+2
Original file line numberDiff line numberDiff line change
@@ -2443,6 +2443,8 @@ def _factorize_keys(
24432443
.astype(np.intp, copy=False),
24442444
len(dc.dictionary),
24452445
)
2446+
if dc.null_count > 0:
2447+
count += 1
24462448
if how == "right":
24472449
return rlab, llab, count
24482450
return llab, rlab, count

pandas/tests/frame/methods/test_join.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup):
158158
left_no_dup.merge(right_no_dup, on="a", validate="invalid")
159159

160160

161-
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups):
161+
@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"])
162+
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype):
162163
# GH 46622
163164
# Dups on right allowed by one_to_many constraint
165+
if dtype == "string[pyarrow]":
166+
pytest.importorskip("pyarrow")
167+
left_no_dup = left_no_dup.astype(dtype)
168+
right_w_dups.index = right_w_dups.index.astype(dtype)
164169
left_no_dup.join(
165170
right_w_dups,
166171
on="a",

0 commit comments

Comments
 (0)