6
6
import numpy as np
7
7
import pytest
8
8
9
- import pandas .util ._test_decorators as td
10
-
11
9
from pandas import (
12
10
DataFrame ,
13
11
Index ,
19
17
import pandas ._testing as tm
20
18
21
19
22
- @pytest .fixture (
23
- params = [
24
- "object" ,
25
- "string" ,
26
- pytest .param (
27
- "arrow_string" , marks = td .skip_if_no ("pyarrow" , min_version = "1.0.0" )
28
- ),
29
- ]
30
- )
31
- def any_string_dtype (request ):
32
- """
33
- Parametrized fixture for string dtypes.
34
- * 'object'
35
- * 'string'
36
- * 'arrow_string'
37
- """
38
- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
39
-
40
- return request .param
41
-
42
-
43
20
def assert_series_or_index_equal (left , right ):
44
21
if isinstance (left , Series ):
45
22
tm .assert_series_equal (left , right )
@@ -159,8 +136,14 @@ def test_repeat():
159
136
tm .assert_series_equal (rs , xp )
160
137
161
138
162
- def test_repeat_with_null (nullable_string_dtype ):
139
+ def test_repeat_with_null (nullable_string_dtype , request ):
163
140
# GH: 31632
141
+
142
+ if nullable_string_dtype == "arrow_string" :
143
+ reason = 'Attribute "dtype" are different'
144
+ mark = pytest .mark .xfail (reason = reason )
145
+ request .node .add_marker (mark )
146
+
164
147
ser = Series (["a" , None ], dtype = nullable_string_dtype )
165
148
result = ser .str .repeat ([3 , 4 ])
166
149
expected = Series (["aaa" , None ], dtype = nullable_string_dtype )
@@ -172,15 +155,10 @@ def test_repeat_with_null(nullable_string_dtype):
172
155
tm .assert_series_equal (result , expected )
173
156
174
157
175
- def test_empty_str_methods (any_string_dtype ):
176
- empty_str = empty = Series (dtype = any_string_dtype )
177
- if any_string_dtype == "object" :
178
- empty_int = Series (dtype = "int64" )
179
- empty_bool = Series (dtype = bool )
180
- else :
181
- empty_int = Series (dtype = "Int64" )
182
- empty_bool = Series (dtype = "boolean" )
183
- empty_object = Series (dtype = object )
158
+ def test_empty_str_methods ():
159
+ empty_str = empty = Series (dtype = object )
160
+ empty_int = Series (dtype = "int64" )
161
+ empty_bool = Series (dtype = bool )
184
162
empty_bytes = Series (dtype = object )
185
163
186
164
# GH7241
@@ -212,23 +190,23 @@ def test_empty_str_methods(any_string_dtype):
212
190
tm .assert_frame_equal (DataFrame (dtype = str ), empty .str .get_dummies ())
213
191
tm .assert_series_equal (empty_str , empty_str .str .join ("" ))
214
192
tm .assert_series_equal (empty_int , empty .str .len ())
215
- tm .assert_series_equal (empty_object , empty_str .str .findall ("a" ))
193
+ tm .assert_series_equal (empty_str , empty_str .str .findall ("a" ))
216
194
tm .assert_series_equal (empty_int , empty .str .find ("a" ))
217
195
tm .assert_series_equal (empty_int , empty .str .rfind ("a" ))
218
196
tm .assert_series_equal (empty_str , empty .str .pad (42 ))
219
197
tm .assert_series_equal (empty_str , empty .str .center (42 ))
220
- tm .assert_series_equal (empty_object , empty .str .split ("a" ))
221
- tm .assert_series_equal (empty_object , empty .str .rsplit ("a" ))
222
- tm .assert_series_equal (empty_object , empty .str .partition ("a" , expand = False ))
223
- tm .assert_series_equal (empty_object , empty .str .rpartition ("a" , expand = False ))
198
+ tm .assert_series_equal (empty_str , empty .str .split ("a" ))
199
+ tm .assert_series_equal (empty_str , empty .str .rsplit ("a" ))
200
+ tm .assert_series_equal (empty_str , empty .str .partition ("a" , expand = False ))
201
+ tm .assert_series_equal (empty_str , empty .str .rpartition ("a" , expand = False ))
224
202
tm .assert_series_equal (empty_str , empty .str .slice (stop = 1 ))
225
203
tm .assert_series_equal (empty_str , empty .str .slice (step = 1 ))
226
204
tm .assert_series_equal (empty_str , empty .str .strip ())
227
205
tm .assert_series_equal (empty_str , empty .str .lstrip ())
228
206
tm .assert_series_equal (empty_str , empty .str .rstrip ())
229
207
tm .assert_series_equal (empty_str , empty .str .wrap (42 ))
230
208
tm .assert_series_equal (empty_str , empty .str .get (0 ))
231
- tm .assert_series_equal (empty_object , empty_bytes .str .decode ("ascii" ))
209
+ tm .assert_series_equal (empty_str , empty_bytes .str .decode ("ascii" ))
232
210
tm .assert_series_equal (empty_bytes , empty .str .encode ("ascii" ))
233
211
# ismethods should always return boolean (GH 29624)
234
212
tm .assert_series_equal (empty_bool , empty .str .isalnum ())
@@ -255,9 +233,9 @@ def test_empty_str_methods_to_frame():
255
233
tm .assert_frame_equal (empty_df , empty .str .rpartition ("a" ))
256
234
257
235
258
- def test_ismethods (any_string_dtype ):
236
+ def test_ismethods ():
259
237
values = ["A" , "b" , "Xy" , "4" , "3A" , "" , "TT" , "55" , "-" , " " ]
260
- str_s = Series (values , dtype = any_string_dtype )
238
+ str_s = Series (values )
261
239
alnum_e = [True , True , True , True , True , False , True , True , False , False ]
262
240
alpha_e = [True , True , True , False , False , False , True , False , False , False ]
263
241
digit_e = [False , False , False , True , False , False , False , True , False , False ]
@@ -281,14 +259,13 @@ def test_ismethods(any_string_dtype):
281
259
upper_e = [True , False , False , False , True , False , True , False , False , False ]
282
260
title_e = [True , False , True , False , True , False , False , False , False , False ]
283
261
284
- dtype = "bool" if any_string_dtype == "object" else "boolean"
285
- tm .assert_series_equal (str_s .str .isalnum (), Series (alnum_e , dtype = dtype ))
286
- tm .assert_series_equal (str_s .str .isalpha (), Series (alpha_e , dtype = dtype ))
287
- tm .assert_series_equal (str_s .str .isdigit (), Series (digit_e , dtype = dtype ))
288
- tm .assert_series_equal (str_s .str .isspace (), Series (space_e , dtype = dtype ))
289
- tm .assert_series_equal (str_s .str .islower (), Series (lower_e , dtype = dtype ))
290
- tm .assert_series_equal (str_s .str .isupper (), Series (upper_e , dtype = dtype ))
291
- tm .assert_series_equal (str_s .str .istitle (), Series (title_e , dtype = dtype ))
262
+ tm .assert_series_equal (str_s .str .isalnum (), Series (alnum_e ))
263
+ tm .assert_series_equal (str_s .str .isalpha (), Series (alpha_e ))
264
+ tm .assert_series_equal (str_s .str .isdigit (), Series (digit_e ))
265
+ tm .assert_series_equal (str_s .str .isspace (), Series (space_e ))
266
+ tm .assert_series_equal (str_s .str .islower (), Series (lower_e ))
267
+ tm .assert_series_equal (str_s .str .isupper (), Series (upper_e ))
268
+ tm .assert_series_equal (str_s .str .istitle (), Series (title_e ))
292
269
293
270
assert str_s .str .isalnum ().tolist () == [v .isalnum () for v in values ]
294
271
assert str_s .str .isalpha ().tolist () == [v .isalpha () for v in values ]
@@ -299,30 +276,28 @@ def test_ismethods(any_string_dtype):
299
276
assert str_s .str .istitle ().tolist () == [v .istitle () for v in values ]
300
277
301
278
302
- def test_isnumeric (any_string_dtype ):
279
+ def test_isnumeric ():
303
280
# 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
304
281
# 0x2605: ★ not number
305
282
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
306
283
# 0xFF13: 3 Em 3
307
284
values = ["A" , "3" , "¼" , "★" , "፸" , "3" , "four" ]
308
- s = Series (values , dtype = any_string_dtype )
285
+ s = Series (values )
309
286
numeric_e = [False , True , True , False , True , True , False ]
310
287
decimal_e = [False , True , False , False , False , True , False ]
311
- dtype = "bool" if any_string_dtype == "object" else "boolean"
312
- tm .assert_series_equal (s .str .isnumeric (), Series (numeric_e , dtype = dtype ))
313
- tm .assert_series_equal (s .str .isdecimal (), Series (decimal_e , dtype = dtype ))
288
+ tm .assert_series_equal (s .str .isnumeric (), Series (numeric_e ))
289
+ tm .assert_series_equal (s .str .isdecimal (), Series (decimal_e ))
314
290
315
291
unicodes = ["A" , "3" , "¼" , "★" , "፸" , "3" , "four" ]
316
292
assert s .str .isnumeric ().tolist () == [v .isnumeric () for v in unicodes ]
317
293
assert s .str .isdecimal ().tolist () == [v .isdecimal () for v in unicodes ]
318
294
319
295
values = ["A" , np .nan , "¼" , "★" , np .nan , "3" , "four" ]
320
- s = Series (values , dtype = any_string_dtype )
296
+ s = Series (values )
321
297
numeric_e = [False , np .nan , True , False , np .nan , True , False ]
322
298
decimal_e = [False , np .nan , False , False , np .nan , True , False ]
323
- dtype = "object" if any_string_dtype == "object" else "boolean"
324
- tm .assert_series_equal (s .str .isnumeric (), Series (numeric_e , dtype = dtype ))
325
- tm .assert_series_equal (s .str .isdecimal (), Series (decimal_e , dtype = dtype ))
299
+ tm .assert_series_equal (s .str .isnumeric (), Series (numeric_e ))
300
+ tm .assert_series_equal (s .str .isdecimal (), Series (decimal_e ))
326
301
327
302
328
303
def test_get_dummies ():
@@ -570,19 +545,19 @@ def test_slice_replace():
570
545
tm .assert_series_equal (result , exp )
571
546
572
547
573
- def test_strip_lstrip_rstrip (any_string_dtype ):
574
- values = Series ([" aa " , " bb \n " , np .nan , "cc " ], dtype = any_string_dtype )
548
+ def test_strip_lstrip_rstrip ():
549
+ values = Series ([" aa " , " bb \n " , np .nan , "cc " ])
575
550
576
551
result = values .str .strip ()
577
- exp = Series (["aa" , "bb" , np .nan , "cc" ], dtype = any_string_dtype )
552
+ exp = Series (["aa" , "bb" , np .nan , "cc" ])
578
553
tm .assert_series_equal (result , exp )
579
554
580
555
result = values .str .lstrip ()
581
- exp = Series (["aa " , "bb \n " , np .nan , "cc " ], dtype = any_string_dtype )
556
+ exp = Series (["aa " , "bb \n " , np .nan , "cc " ])
582
557
tm .assert_series_equal (result , exp )
583
558
584
559
result = values .str .rstrip ()
585
- exp = Series ([" aa" , " bb" , np .nan , "cc" ], dtype = any_string_dtype )
560
+ exp = Series ([" aa" , " bb" , np .nan , "cc" ])
586
561
tm .assert_series_equal (result , exp )
587
562
588
563
@@ -609,19 +584,19 @@ def test_strip_lstrip_rstrip_mixed():
609
584
tm .assert_almost_equal (rs , xp )
610
585
611
586
612
- def test_strip_lstrip_rstrip_args (any_string_dtype ):
613
- values = Series (["xxABCxx" , "xx BNSD" , "LDFJH xx" ], dtype = any_string_dtype )
587
+ def test_strip_lstrip_rstrip_args ():
588
+ values = Series (["xxABCxx" , "xx BNSD" , "LDFJH xx" ])
614
589
615
590
rs = values .str .strip ("x" )
616
- xp = Series (["ABC" , " BNSD" , "LDFJH " ], dtype = any_string_dtype )
591
+ xp = Series (["ABC" , " BNSD" , "LDFJH " ])
617
592
tm .assert_series_equal (rs , xp )
618
593
619
594
rs = values .str .lstrip ("x" )
620
- xp = Series (["ABCxx" , " BNSD" , "LDFJH xx" ], dtype = any_string_dtype )
595
+ xp = Series (["ABCxx" , " BNSD" , "LDFJH xx" ])
621
596
tm .assert_series_equal (rs , xp )
622
597
623
598
rs = values .str .rstrip ("x" )
624
- xp = Series (["xxABC" , "xx BNSD" , "LDFJH " ], dtype = any_string_dtype )
599
+ xp = Series (["xxABC" , "xx BNSD" , "LDFJH " ])
625
600
tm .assert_series_equal (rs , xp )
626
601
627
602
0 commit comments