Skip to content

Commit 0c0f924

Browse files
simonjayhawkinsyeshsurya
authored andcommitted
[ArrowStringArray] TST: more parameterised testing - part 4 (pandas-dev#40963)
1 parent 630832b commit 0c0f924

File tree

1 file changed

+43
-68
lines changed

1 file changed

+43
-68
lines changed

pandas/tests/strings/test_strings.py

+43-68
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
import pandas.util._test_decorators as td
10-
119
from pandas import (
1210
DataFrame,
1311
Index,
@@ -19,27 +17,6 @@
1917
import pandas._testing as tm
2018

2119

22-
@pytest.fixture(
23-
params=[
24-
"object",
25-
"string",
26-
pytest.param(
27-
"arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
28-
),
29-
]
30-
)
31-
def any_string_dtype(request):
32-
"""
33-
Parametrized fixture for string dtypes.
34-
* 'object'
35-
* 'string'
36-
* 'arrow_string'
37-
"""
38-
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
39-
40-
return request.param
41-
42-
4320
def assert_series_or_index_equal(left, right):
4421
if isinstance(left, Series):
4522
tm.assert_series_equal(left, right)
@@ -159,8 +136,14 @@ def test_repeat():
159136
tm.assert_series_equal(rs, xp)
160137

161138

162-
def test_repeat_with_null(nullable_string_dtype):
139+
def test_repeat_with_null(nullable_string_dtype, request):
163140
# GH: 31632
141+
142+
if nullable_string_dtype == "arrow_string":
143+
reason = 'Attribute "dtype" are different'
144+
mark = pytest.mark.xfail(reason=reason)
145+
request.node.add_marker(mark)
146+
164147
ser = Series(["a", None], dtype=nullable_string_dtype)
165148
result = ser.str.repeat([3, 4])
166149
expected = Series(["aaa", None], dtype=nullable_string_dtype)
@@ -172,15 +155,10 @@ def test_repeat_with_null(nullable_string_dtype):
172155
tm.assert_series_equal(result, expected)
173156

174157

175-
def test_empty_str_methods(any_string_dtype):
176-
empty_str = empty = Series(dtype=any_string_dtype)
177-
if any_string_dtype == "object":
178-
empty_int = Series(dtype="int64")
179-
empty_bool = Series(dtype=bool)
180-
else:
181-
empty_int = Series(dtype="Int64")
182-
empty_bool = Series(dtype="boolean")
183-
empty_object = Series(dtype=object)
158+
def test_empty_str_methods():
159+
empty_str = empty = Series(dtype=object)
160+
empty_int = Series(dtype="int64")
161+
empty_bool = Series(dtype=bool)
184162
empty_bytes = Series(dtype=object)
185163

186164
# GH7241
@@ -212,23 +190,23 @@ def test_empty_str_methods(any_string_dtype):
212190
tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies())
213191
tm.assert_series_equal(empty_str, empty_str.str.join(""))
214192
tm.assert_series_equal(empty_int, empty.str.len())
215-
tm.assert_series_equal(empty_object, empty_str.str.findall("a"))
193+
tm.assert_series_equal(empty_str, empty_str.str.findall("a"))
216194
tm.assert_series_equal(empty_int, empty.str.find("a"))
217195
tm.assert_series_equal(empty_int, empty.str.rfind("a"))
218196
tm.assert_series_equal(empty_str, empty.str.pad(42))
219197
tm.assert_series_equal(empty_str, empty.str.center(42))
220-
tm.assert_series_equal(empty_object, empty.str.split("a"))
221-
tm.assert_series_equal(empty_object, empty.str.rsplit("a"))
222-
tm.assert_series_equal(empty_object, empty.str.partition("a", expand=False))
223-
tm.assert_series_equal(empty_object, empty.str.rpartition("a", expand=False))
198+
tm.assert_series_equal(empty_str, empty.str.split("a"))
199+
tm.assert_series_equal(empty_str, empty.str.rsplit("a"))
200+
tm.assert_series_equal(empty_str, empty.str.partition("a", expand=False))
201+
tm.assert_series_equal(empty_str, empty.str.rpartition("a", expand=False))
224202
tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
225203
tm.assert_series_equal(empty_str, empty.str.slice(step=1))
226204
tm.assert_series_equal(empty_str, empty.str.strip())
227205
tm.assert_series_equal(empty_str, empty.str.lstrip())
228206
tm.assert_series_equal(empty_str, empty.str.rstrip())
229207
tm.assert_series_equal(empty_str, empty.str.wrap(42))
230208
tm.assert_series_equal(empty_str, empty.str.get(0))
231-
tm.assert_series_equal(empty_object, empty_bytes.str.decode("ascii"))
209+
tm.assert_series_equal(empty_str, empty_bytes.str.decode("ascii"))
232210
tm.assert_series_equal(empty_bytes, empty.str.encode("ascii"))
233211
# ismethods should always return boolean (GH 29624)
234212
tm.assert_series_equal(empty_bool, empty.str.isalnum())
@@ -255,9 +233,9 @@ def test_empty_str_methods_to_frame():
255233
tm.assert_frame_equal(empty_df, empty.str.rpartition("a"))
256234

257235

258-
def test_ismethods(any_string_dtype):
236+
def test_ismethods():
259237
values = ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", " "]
260-
str_s = Series(values, dtype=any_string_dtype)
238+
str_s = Series(values)
261239
alnum_e = [True, True, True, True, True, False, True, True, False, False]
262240
alpha_e = [True, True, True, False, False, False, True, False, False, False]
263241
digit_e = [False, False, False, True, False, False, False, True, False, False]
@@ -281,14 +259,13 @@ def test_ismethods(any_string_dtype):
281259
upper_e = [True, False, False, False, True, False, True, False, False, False]
282260
title_e = [True, False, True, False, True, False, False, False, False, False]
283261

284-
dtype = "bool" if any_string_dtype == "object" else "boolean"
285-
tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e, dtype=dtype))
286-
tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e, dtype=dtype))
287-
tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e, dtype=dtype))
288-
tm.assert_series_equal(str_s.str.isspace(), Series(space_e, dtype=dtype))
289-
tm.assert_series_equal(str_s.str.islower(), Series(lower_e, dtype=dtype))
290-
tm.assert_series_equal(str_s.str.isupper(), Series(upper_e, dtype=dtype))
291-
tm.assert_series_equal(str_s.str.istitle(), Series(title_e, dtype=dtype))
262+
tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
263+
tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
264+
tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
265+
tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
266+
tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
267+
tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
268+
tm.assert_series_equal(str_s.str.istitle(), Series(title_e))
292269

293270
assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values]
294271
assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values]
@@ -299,30 +276,28 @@ def test_ismethods(any_string_dtype):
299276
assert str_s.str.istitle().tolist() == [v.istitle() for v in values]
300277

301278

302-
def test_isnumeric(any_string_dtype):
279+
def test_isnumeric():
303280
# 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
304281
# 0x2605: ★ not number
305282
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
306283
# 0xFF13: 3 Em 3
307284
values = ["A", "3", "¼", "★", "፸", "3", "four"]
308-
s = Series(values, dtype=any_string_dtype)
285+
s = Series(values)
309286
numeric_e = [False, True, True, False, True, True, False]
310287
decimal_e = [False, True, False, False, False, True, False]
311-
dtype = "bool" if any_string_dtype == "object" else "boolean"
312-
tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e, dtype=dtype))
313-
tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e, dtype=dtype))
288+
tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e))
289+
tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e))
314290

315291
unicodes = ["A", "3", "¼", "★", "፸", "3", "four"]
316292
assert s.str.isnumeric().tolist() == [v.isnumeric() for v in unicodes]
317293
assert s.str.isdecimal().tolist() == [v.isdecimal() for v in unicodes]
318294

319295
values = ["A", np.nan, "¼", "★", np.nan, "3", "four"]
320-
s = Series(values, dtype=any_string_dtype)
296+
s = Series(values)
321297
numeric_e = [False, np.nan, True, False, np.nan, True, False]
322298
decimal_e = [False, np.nan, False, False, np.nan, True, False]
323-
dtype = "object" if any_string_dtype == "object" else "boolean"
324-
tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e, dtype=dtype))
325-
tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e, dtype=dtype))
299+
tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e))
300+
tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e))
326301

327302

328303
def test_get_dummies():
@@ -570,19 +545,19 @@ def test_slice_replace():
570545
tm.assert_series_equal(result, exp)
571546

572547

573-
def test_strip_lstrip_rstrip(any_string_dtype):
574-
values = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype)
548+
def test_strip_lstrip_rstrip():
549+
values = Series([" aa ", " bb \n", np.nan, "cc "])
575550

576551
result = values.str.strip()
577-
exp = Series(["aa", "bb", np.nan, "cc"], dtype=any_string_dtype)
552+
exp = Series(["aa", "bb", np.nan, "cc"])
578553
tm.assert_series_equal(result, exp)
579554

580555
result = values.str.lstrip()
581-
exp = Series(["aa ", "bb \n", np.nan, "cc "], dtype=any_string_dtype)
556+
exp = Series(["aa ", "bb \n", np.nan, "cc "])
582557
tm.assert_series_equal(result, exp)
583558

584559
result = values.str.rstrip()
585-
exp = Series([" aa", " bb", np.nan, "cc"], dtype=any_string_dtype)
560+
exp = Series([" aa", " bb", np.nan, "cc"])
586561
tm.assert_series_equal(result, exp)
587562

588563

@@ -609,19 +584,19 @@ def test_strip_lstrip_rstrip_mixed():
609584
tm.assert_almost_equal(rs, xp)
610585

611586

612-
def test_strip_lstrip_rstrip_args(any_string_dtype):
613-
values = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype)
587+
def test_strip_lstrip_rstrip_args():
588+
values = Series(["xxABCxx", "xx BNSD", "LDFJH xx"])
614589

615590
rs = values.str.strip("x")
616-
xp = Series(["ABC", " BNSD", "LDFJH "], dtype=any_string_dtype)
591+
xp = Series(["ABC", " BNSD", "LDFJH "])
617592
tm.assert_series_equal(rs, xp)
618593

619594
rs = values.str.lstrip("x")
620-
xp = Series(["ABCxx", " BNSD", "LDFJH xx"], dtype=any_string_dtype)
595+
xp = Series(["ABCxx", " BNSD", "LDFJH xx"])
621596
tm.assert_series_equal(rs, xp)
622597

623598
rs = values.str.rstrip("x")
624-
xp = Series(["xxABC", "xx BNSD", "LDFJH "], dtype=any_string_dtype)
599+
xp = Series(["xxABC", "xx BNSD", "LDFJH "])
625600
tm.assert_series_equal(rs, xp)
626601

627602

0 commit comments

Comments
 (0)