|
17 | 17 | object_pyarrow_numpy,
|
18 | 18 | )
|
19 | 19 |
|
| 20 | +pa = pytest.importorskip("pyarrow") |
| 21 | + |
| 22 | +from pandas.core.arrays.arrow.array import ArrowExtensionArray |
| 23 | + |
20 | 24 |
|
21 | 25 | @pytest.mark.parametrize("method", ["split", "rsplit"])
|
22 | 26 | def test_split(any_string_dtype, method):
|
@@ -59,27 +63,39 @@ def test_split_regex(any_string_dtype):
|
59 | 63 | tm.assert_series_equal(result, exp)
|
60 | 64 |
|
61 | 65 |
|
62 |
| -def test_split_regex_explicit(any_string_dtype): |
| 66 | +def test_split_regex_explicit(any_string_dtype_2): |
63 | 67 | # explicit regex = True split with compiled regex
|
64 | 68 | regex_pat = re.compile(r".jpg")
|
65 |
| - values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype) |
66 |
| - result = values.str.split(regex_pat) |
67 |
| - exp = Series([["xx", "zzz", ""]]) |
68 |
| - tm.assert_series_equal(result, exp) |
| 69 | + values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype_2) |
| 70 | + |
| 71 | + if not isinstance(any_string_dtype_2, pd.ArrowDtype): |
| 72 | + # ArrowDtype does not support compiled regex |
| 73 | + result = values.str.split(regex_pat) |
| 74 | + exp = Series([["xx", "zzz", ""]]) |
| 75 | + tm.assert_series_equal(result, exp) |
69 | 76 |
|
70 | 77 | # explicit regex = False split
|
71 | 78 | result = values.str.split(r"\.jpg", regex=False)
|
72 |
| - exp = Series([["xxxjpgzzz.jpg"]]) |
| 79 | + if not isinstance(any_string_dtype_2, pd.ArrowDtype): |
| 80 | + exp = Series([["xxxjpgzzz.jpg"]]) |
| 81 | + else: |
| 82 | + exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz.jpg"]]))) |
73 | 83 | tm.assert_series_equal(result, exp)
|
74 | 84 |
|
75 | 85 | # non explicit regex split, pattern length == 1
|
76 | 86 | result = values.str.split(r".")
|
77 |
| - exp = Series([["xxxjpgzzz", "jpg"]]) |
| 87 | + if not isinstance(any_string_dtype_2, pd.ArrowDtype): |
| 88 | + exp = Series([["xxxjpgzzz", "jpg"]]) |
| 89 | + else: |
| 90 | + exp = Series(ArrowExtensionArray(pa.array([["xxxjpgzzz", "jpg"]]))) |
78 | 91 | tm.assert_series_equal(result, exp)
|
79 | 92 |
|
80 | 93 | # non explicit regex split, pattern length != 1
|
81 | 94 | result = values.str.split(r".jpg")
|
82 |
| - exp = Series([["xx", "zzz", ""]]) |
| 95 | + if not isinstance(any_string_dtype_2, pd.ArrowDtype): |
| 96 | + exp = Series([["xx", "zzz", ""]]) |
| 97 | + else: |
| 98 | + exp = Series(ArrowExtensionArray(pa.array([["xx", "zzz", ""]]))) |
83 | 99 | tm.assert_series_equal(result, exp)
|
84 | 100 |
|
85 | 101 | # regex=False with pattern compiled regex raises error
|
|
0 commit comments