Skip to content

Commit c46c3bf

Browse files
Backport PR #55348 on branch 2.1.x (REGR: join segfaulting for arrow string with nulls) (#55357)
Backport PR #55348: REGR: join segfaulting for arrow string with nulls Co-authored-by: Patrick Hoefler <[email protected]>
1 parent fd76235 commit c46c3bf

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

doc/source/whatsnew/v2.1.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ including other versions of pandas.
1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
17-
-
17+
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
1818

1919
.. ---------------------------------------------------------------------------
2020
.. _whatsnew_212.bug_fixes:

pandas/core/reshape/merge.py

+2
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,8 @@ def _factorize_keys(
24422442
.astype(np.intp, copy=False),
24432443
len(dc.dictionary),
24442444
)
2445+
if dc.null_count > 0:
2446+
count += 1
24452447
if how == "right":
24462448
return rlab, llab, count
24472449
return llab, rlab, count

pandas/tests/frame/methods/test_join.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup):
158158
left_no_dup.merge(right_no_dup, on="a", validate="invalid")
159159

160160

161-
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups):
161+
@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"])
162+
def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype):
162163
# GH 46622
163164
# Dups on right allowed by one_to_many constraint
165+
if dtype == "string[pyarrow]":
166+
pytest.importorskip("pyarrow")
167+
left_no_dup = left_no_dup.astype(dtype)
168+
right_w_dups.index = right_w_dups.index.astype(dtype)
164169
left_no_dup.join(
165170
right_w_dups,
166171
on="a",

0 commit comments

Comments
 (0)