diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ba5334b2f4fa8..b37ef3e1bec0d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -102,6 +102,7 @@ Other enhancements - :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`) +- :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`) - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`) - :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`) - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 817d5d0932744..601b418296e7f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2073,7 +2073,12 @@ def _str_get(self, i: int): return type(self)(result) def _str_join(self, sep: str): - return type(self)(pc.binary_join(self._pa_array, sep)) + if pa.types.is_string(self._pa_array.type): + result = self._apply_elementwise(list) + result = pa.chunked_array(result, type=pa.list_(pa.string())) + else: + result = self._pa_array + return type(self)(pc.binary_join(result, sep)) def _str_partition(self, sep: str, expand: bool): predicate = lambda val: val.partition(sep) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 8625500a83e79..05d6e0e8722c5 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2026,6 +2026,13 @@ def test_str_join(): tm.assert_series_equal(result, expected) +def test_str_join_string_type(): + ser = pd.Series(ArrowExtensionArray(pa.array(["abc", "123", None]))) + result = ser.str.join("=") + expected = pd.Series(["a=b=c", "1=2=3", None], dtype=ArrowDtype(pa.string())) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "start, stop, step, exp", [