diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 1a25b848e0f84..a5ba365f2d456 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -15,7 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`) - Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`) -- +- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`) .. --------------------------------------------------------------------------- .. _whatsnew_212.bug_fixes: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4b9fcc80af4bb..ba6579a739f54 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2443,6 +2443,8 @@ def _factorize_keys( .astype(np.intp, copy=False), len(dc.dictionary), ) + if dc.null_count > 0: + count += 1 if how == "right": return rlab, llab, count return llab, rlab, count diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 2d4ac1d4a4444..3d21faf8b1729 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -158,9 +158,14 @@ def test_join_invalid_validate(left_no_dup, right_no_dup): left_no_dup.merge(right_no_dup, on="a", validate="invalid") -def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups): +@pytest.mark.parametrize("dtype", ["object", "string[pyarrow]"]) +def test_join_on_single_col_dup_on_right(left_no_dup, right_w_dups, dtype): # GH 46622 # Dups on right allowed by one_to_many constraint + if dtype == "string[pyarrow]": + pytest.importorskip("pyarrow") + left_no_dup = left_no_dup.astype(dtype) + right_w_dups.index = right_w_dups.index.astype(dtype) left_no_dup.join( right_w_dups, on="a",