diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 6fec66ec8d556..158cb51f05316 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`) -- +- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`) .. --------------------------------------------------------------------------- .. _whatsnew_212.bug_fixes: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d36ceff800c56..74181f8dc853c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2442,6 +2442,8 @@ def _factorize_keys( .astype(np.intp, copy=False), len(dc.dictionary), ) + if dc.null_count > 0: + count += 1 if how == "right": return rlab, llab, count return llab, rlab, count diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 98f3926968ad0..3b6b86056f8c7 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup): left_no_dup.merge(right_no_dup, on="a", validate="invalid") -def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups): +@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"]) +def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype): # GH 46622 # Dups on right allowed by one_to_many constraint + if dtype == "string[pyarrow]": + pytest.importorskip("pyarrow") + left_no_dup = left_no_dup.astype(dtype) + right_w_dups.index = right_w_dups.index.astype(dtype) left_no_dup.join( right_w_dups, on="a",