From 0349837e3122650ad2c5a9f978a89fdbb0782c3d Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Thu, 25 Apr 2024 19:24:44 +0800
Subject: [PATCH 1/9] Add test

---
 pandas/tests/extension/test_arrow.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 79440b55dd5dd..c9cf264b1e4f8 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2296,6 +2296,16 @@ def test_str_split_pat_none(method):
     tm.assert_series_equal(result, expected)
 
 
+def test_str_split_regex_none():
+    # GH 58321
+    ser = pd.Series(["230/270/270", "240-290-290"], dtype=ArrowDtype(pa.string()))
+    result = ser.str.split(r"/|-", regex=None)
+    expected = pd.Series(
+        ArrowExtensionArray(pa.array([["230", "270", "270"], ["240", "290", "290"]]))
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_str_split():
     # GH 52401
     ser = pd.Series(["a1cbcb", "a2cbcb", None], dtype=ArrowDtype(pa.string()))

From 4948784959a2f4840e291aeb34874ffaa94a6295 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Thu, 25 Apr 2024 19:34:04 +0800
Subject: [PATCH 2/9] Fix

---
 pandas/core/arrays/arrow/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 1154130b9bed3..47bcd73e270aa 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2579,7 +2579,7 @@ def _str_split(
             n = None
         if pat is None:
             split_func = pc.utf8_split_whitespace
-        elif regex:
+        elif regex or (regex is None and len(pat) != 1):
             split_func = functools.partial(pc.split_pattern_regex, pattern=pat)
         else:
             split_func = functools.partial(pc.split_pattern, pattern=pat)

From 14c059ce428f728b85c93d3b2d8cdbc963662cb6 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Thu, 25 Apr 2024 19:34:56 +0800
Subject: [PATCH 3/9] Add whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c77348b365370..e4bb4b0c5604a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -391,6 +391,7 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.str.split` would not treat ``pat`` as regex when ``regex=None`` for series having ``pd.ArrowDtype(pa.string())`` dtype (:issue:`58321`)
 - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
 -
 

From f0c2097aba28cbe5df1a0c9dc10a86d5fa582826 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Tue, 30 Apr 2024 16:33:55 +0800
Subject: [PATCH 4/9] Move logic outside

---
 pandas/core/arrays/arrow/array.py   | 2 +-
 pandas/core/strings/accessor.py     | 3 +++
 pandas/core/strings/object_array.py | 8 +-------
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index b0be934576b61..0240433cdb683 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2581,7 +2581,7 @@ def _str_split(
             n = None
         if pat is None:
             split_func = pc.utf8_split_whitespace
-        elif regex or (regex is None and len(pat) != 1):
+        elif regex:
             split_func = functools.partial(pc.split_pattern_regex, pattern=pat)
         else:
             split_func = functools.partial(pc.split_pattern, pattern=pat)
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index d274c1d7a5aff..cb1fae32d4c7f 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -910,6 +910,9 @@ def split(
             )
         if is_re(pat):
             regex = True
+        elif pat is not None and regex is None:
+            # regex is None so link to old behavior #43563
+            regex = len(pat) != 1
         result = self._data.array._str_split(pat, n, expand, regex)
         if self._data.dtype == "category":
             dtype = self._data.dtype.categories.dtype
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index bdcf55e61d2d1..91b9e544f72cf 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -339,14 +339,8 @@ def _str_split(
             new_pat: str | re.Pattern
             if regex is True or isinstance(pat, re.Pattern):
                 new_pat = re.compile(pat)
-            elif regex is False:
-                new_pat = pat
-            # regex is None so link to old behavior #43563
             else:
-                if len(pat) == 1:
-                    new_pat = pat
-                else:
-                    new_pat = re.compile(pat)
+                new_pat = pat
 
             if isinstance(new_pat, re.Pattern):
                 if n is None or n == -1:

From 6f93a8d9a8d50acbbf055b328cc51ccce2018a11 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Tue, 30 Apr 2024 16:40:59 +0800
Subject: [PATCH 5/9] Move test

---
 pandas/conftest.py                           | 20 +++++++++++-
 pandas/tests/extension/test_arrow.py         | 10 ------
 pandas/tests/strings/test_split_partition.py | 32 +++++++++++++++-----
 3 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 21100178262c8..f1cd91b220bd2 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -92,7 +92,6 @@
 except ImportError:
     has_pyarrow = False
 else:
-    del pa
     has_pyarrow = True
 
 import zoneinfo
@@ -1367,6 +1366,25 @@ def any_string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        "object",
+        "string[python]",
+        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
+        pytest.param(pd.ArrowDtype(pa.string()), marks=td.skip_if_no("pyarrow")),
+    ]
+)
+def any_string_dtype_2(request):
+    """
+    Parametrized fixture for string dtypes.
+    * 'object'
+    * 'string[python]'
+    * 'string[pyarrow]'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.DATETIME64_DTYPES)
 def datetime64_dtype(request):
     """
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 5b66b40357fe8..7d31fe6085c3a 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2296,16 +2296,6 @@ def test_str_split_pat_none(method):
     tm.assert_series_equal(result, expected)
 
 
-def test_str_split_regex_none():
-    # GH 58321
-    ser = pd.Series(["230/270/270", "240-290-290"], dtype=ArrowDtype(pa.string()))
-    result = ser.str.split(r"/|-", regex=None)
-    expected = pd.Series(
-        ArrowExtensionArray(pa.array([["230", "270", "270"], ["240", "290", "290"]]))
-    )
-    tm.assert_series_equal(result, expected)
-
-
 def test_str_split():
     # GH 52401
     ser = pd.Series(["a1cbcb", "a2cbcb", None], dtype=ArrowDtype(pa.string()))
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 452e5ec5cf939..53c50304cc8a0 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -17,6 +17,10 @@
     object_pyarrow_numpy,
 )
 
+pa = pytest.importorskip("pyarrow")
+
+from pandas.core.arrays.arrow.array import ArrowExtensionArray
+
 
 @pytest.mark.parametrize("method", ["split", "rsplit"])
 def test_split(any_string_dtype, method):
@@ -59,27 +63,39 @@ def test_split_regex(any_string_dtype):
     tm.assert_series_equal(result, exp)
 
 
-def test_split_regex_explicit(any_string_dtype):
+def test_split_regex_explicit(any_string_dtype_2):
     # explicit regex = True split with compiled regex
     regex_pat = re.compile(r".jpg")
-    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
-    result = values.str.split(regex_pat)
-    exp = Series([["xx", "zzz", ""]])
-    tm.assert_series_equal(result, exp)
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype_2)
+
+    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
+        # ArrowDtype does not support compiled regex
+        result = values.str.split(regex_pat)
+        exp = Series([["xx", "zzz", ""]])
+        tm.assert_series_equal(result, exp)
 
     # explicit regex = False split
     result = values.str.split(r"\.jpg", regex=False)
-    exp = Series([["xxxjpgzzz.jpg"]])
+    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
+        exp = Series([["xxxjpgzzz.jpg"]])
+    else:
+        exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz.jpg"]])))
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length == 1
     result = values.str.split(r".")
-    exp = Series([["xxxjpgzzz", "jpg"]])
+    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
+        exp = Series([["xxxjpgzzz", "jpg"]])
+    else:
+        exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz", "jpg"]])))
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length != 1
     result = values.str.split(r".jpg")
-    exp = Series([["xx", "zzz", ""]])
+    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
+        exp = Series([["xx", "zzz", ""]])
+    else:
+        exp = Series(ArrowExtensionArray(pa.array([["xx", "zzz", ""]])))
     tm.assert_series_equal(result, exp)
 
     # regex=False with pattern compiled regex raises error

From b9b3197e22f638ed0ce5cbe89ed9aa4b8aeafefa Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Sun, 12 May 2024 23:39:05 +0800
Subject: [PATCH 6/9] Revert "Move test"

This reverts commit 6f93a8d9a8d50acbbf055b328cc51ccce2018a11.
---
 pandas/conftest.py                           | 20 +-----------
 pandas/tests/extension/test_arrow.py         | 10 ++++++
 pandas/tests/strings/test_split_partition.py | 32 +++++---------------
 3 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index f1cd91b220bd2..21100178262c8 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -92,6 +92,7 @@
 except ImportError:
     has_pyarrow = False
 else:
+    del pa
     has_pyarrow = True
 
 import zoneinfo
@@ -1366,25 +1367,6 @@ def any_string_dtype(request):
     return request.param
 
 
-@pytest.fixture(
-    params=[
-        "object",
-        "string[python]",
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param(pd.ArrowDtype(pa.string()), marks=td.skip_if_no("pyarrow")),
-    ]
-)
-def any_string_dtype_2(request):
-    """
-    Parametrized fixture for string dtypes.
-    * 'object'
-    * 'string[python]'
-    * 'string[pyarrow]'
-    """
-    return request.param
-
-
 @pytest.fixture(params=tm.DATETIME64_DTYPES)
 def datetime64_dtype(request):
     """
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 7d31fe6085c3a..5b66b40357fe8 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2296,6 +2296,16 @@ def test_str_split_pat_none(method):
     tm.assert_series_equal(result, expected)
 
 
+def test_str_split_regex_none():
+    # GH 58321
+    ser = pd.Series(["230/270/270", "240-290-290"], dtype=ArrowDtype(pa.string()))
+    result = ser.str.split(r"/|-", regex=None)
+    expected = pd.Series(
+        ArrowExtensionArray(pa.array([["230", "270", "270"], ["240", "290", "290"]]))
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_str_split():
     # GH 52401
     ser = pd.Series(["a1cbcb", "a2cbcb", None], dtype=ArrowDtype(pa.string()))
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 53c50304cc8a0..452e5ec5cf939 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -17,10 +17,6 @@
     object_pyarrow_numpy,
 )
 
-pa = pytest.importorskip("pyarrow")
-
-from pandas.core.arrays.arrow.array import ArrowExtensionArray
-
 
 @pytest.mark.parametrize("method", ["split", "rsplit"])
 def test_split(any_string_dtype, method):
@@ -63,39 +59,27 @@ def test_split_regex(any_string_dtype):
     tm.assert_series_equal(result, exp)
 
 
-def test_split_regex_explicit(any_string_dtype_2):
+def test_split_regex_explicit(any_string_dtype):
     # explicit regex = True split with compiled regex
     regex_pat = re.compile(r".jpg")
-    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype_2)
-
-    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
-        # ArrowDtype does not support compiled regex
-        result = values.str.split(regex_pat)
-        exp = Series([["xx", "zzz", ""]])
-        tm.assert_series_equal(result, exp)
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
+    result = values.str.split(regex_pat)
+    exp = Series([["xx", "zzz", ""]])
+    tm.assert_series_equal(result, exp)
 
     # explicit regex = False split
     result = values.str.split(r"\.jpg", regex=False)
-    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
-        exp = Series([["xxxjpgzzz.jpg"]])
-    else:
-        exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz.jpg"]])))
+    exp = Series([["xxxjpgzzz.jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length == 1
     result = values.str.split(r".")
-    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
-        exp = Series([["xxxjpgzzz", "jpg"]])
-    else:
-        exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz", "jpg"]])))
+    exp = Series([["xxxjpgzzz", "jpg"]])
     tm.assert_series_equal(result, exp)
 
     # non explicit regex split, pattern length != 1
     result = values.str.split(r".jpg")
-    if not isinstance(any_string_dtype_2, pd.ArrowDtype):
-        exp = Series([["xx", "zzz", ""]])
-    else:
-        exp = Series(ArrowExtensionArray(pa.array([["xx", "zzz", ""]])))
+    exp = Series([["xx", "zzz", ""]])
     tm.assert_series_equal(result, exp)
 
     # regex=False with pattern compiled regex raises error

From d9199577110e9cbad308640d3bd2a8b3c4e0d049 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Sun, 12 May 2024 23:49:14 +0800
Subject: [PATCH 7/9] Add test

---
 pandas/tests/extension/test_arrow.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 5b66b40357fe8..3a5493529808d 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2296,14 +2296,25 @@ def test_str_split_pat_none(method):
     tm.assert_series_equal(result, expected)
 
 
-def test_str_split_regex_none():
+def test_str_split_regex_explicit():
     # GH 58321
-    ser = pd.Series(["230/270/270", "240-290-290"], dtype=ArrowDtype(pa.string()))
-    result = ser.str.split(r"/|-", regex=None)
-    expected = pd.Series(
-        ArrowExtensionArray(pa.array([["230", "270", "270"], ["240", "290", "290"]]))
-    )
-    tm.assert_series_equal(result, expected)
+    # adapted from tests/strings/test_split_partition.py
+    values = pd.Series("xxxjpgzzz.jpg", dtype=ArrowDtype(pa.string()))
+
+    # explicit regex = False split
+    result = values.str.split(r"\.jpg", regex=False)
+    exp = pd.Series(ArrowExtensionArray(pa.array([["xxxjpgzzz.jpg"]])))
+    tm.assert_series_equal(result, exp)
+
+    # non explicit regex split, pattern length == 1
+    result = values.str.split(r".")
+    exp = pd.Series(ArrowExtensionArray(pa.array([["xxxjpgzzz", "jpg"]])))
+    tm.assert_series_equal(result, exp)
+
+    # non explicit regex split, pattern length != 1
+    result = values.str.split(r".jpg")
+    exp = pd.Series(ArrowExtensionArray(pa.array([["xx", "zzz", ""]])))
+    tm.assert_series_equal(result, exp)
 
 
 def test_str_split():

From 20e5ebe063708d7e501ca5edf67e32d813942049 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Mon, 3 Jun 2024 13:24:10 +0800
Subject: [PATCH 8/9] pass mypy type check

---
 pandas/core/strings/accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index d08e396441907..81a703c889370 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -920,7 +920,7 @@ def split(
             )
         if is_re(pat):
             regex = True
-        elif pat is not None and regex is None:
+        elif isinstance(pat, str) and regex is None:
             # regex is None so link to old behavior #43563
             regex = len(pat) != 1
         result = self._data.array._str_split(pat, n, expand, regex)

From 14fd864d3b20528479c9093d712621e4ccfe63cd Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Thu, 27 Jun 2024 20:11:41 +0800
Subject: [PATCH 9/9] keep _str_split

---
 pandas/core/strings/object_array.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 91b9e544f72cf..bdcf55e61d2d1 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -339,8 +339,14 @@ def _str_split(
             new_pat: str | re.Pattern
             if regex is True or isinstance(pat, re.Pattern):
                 new_pat = re.compile(pat)
-            else:
+            elif regex is False:
                 new_pat = pat
+            # regex is None so link to old behavior #43563
+            else:
+                if len(pat) == 1:
+                    new_pat = pat
+                else:
+                    new_pat = re.compile(pat)
 
             if isinstance(new_pat, re.Pattern):
                 if n is None or n == -1: