3
3
import numpy as np
4
4
import pytest
5
5
6
+ import pandas .util ._test_decorators as td
7
+
6
8
from pandas import (
7
9
DataFrame ,
8
10
Index ,
9
11
MultiIndex ,
10
12
Series ,
11
13
_testing as tm ,
12
14
concat ,
15
+ option_context ,
13
16
)
14
17
15
18
@@ -26,45 +29,49 @@ def test_str_cat_name(index_or_series, other):
26
29
assert result .name == "name"
27
30
28
31
29
- def test_str_cat (index_or_series ):
30
- box = index_or_series
31
- # test_cat above tests "str_cat" from ndarray;
32
- # here testing "str.cat" from Series/Index to ndarray/list
33
- s = box (["a" , "a" , "b" , "b" , "c" , np .nan ])
32
+ @pytest .mark .parametrize (
33
+ "infer_string" , [False , pytest .param (True , marks = td .skip_if_no ("pyarrow" ))]
34
+ )
35
+ def test_str_cat (index_or_series , infer_string ):
36
+ with option_context ("future.infer_string" , infer_string ):
37
+ box = index_or_series
38
+ # test_cat above tests "str_cat" from ndarray;
39
+ # here testing "str.cat" from Series/Index to ndarray/list
40
+ s = box (["a" , "a" , "b" , "b" , "c" , np .nan ])
34
41
35
- # single array
36
- result = s .str .cat ()
37
- expected = "aabbc"
38
- assert result == expected
42
+ # single array
43
+ result = s .str .cat ()
44
+ expected = "aabbc"
45
+ assert result == expected
39
46
40
- result = s .str .cat (na_rep = "-" )
41
- expected = "aabbc-"
42
- assert result == expected
47
+ result = s .str .cat (na_rep = "-" )
48
+ expected = "aabbc-"
49
+ assert result == expected
43
50
44
- result = s .str .cat (sep = "_" , na_rep = "NA" )
45
- expected = "a_a_b_b_c_NA"
46
- assert result == expected
51
+ result = s .str .cat (sep = "_" , na_rep = "NA" )
52
+ expected = "a_a_b_b_c_NA"
53
+ assert result == expected
47
54
48
- t = np .array (["a" , np .nan , "b" , "d" , "foo" , np .nan ], dtype = object )
49
- expected = box (["aa" , "a-" , "bb" , "bd" , "cfoo" , "--" ])
55
+ t = np .array (["a" , np .nan , "b" , "d" , "foo" , np .nan ], dtype = object )
56
+ expected = box (["aa" , "a-" , "bb" , "bd" , "cfoo" , "--" ])
50
57
51
- # Series/Index with array
52
- result = s .str .cat (t , na_rep = "-" )
53
- tm .assert_equal (result , expected )
58
+ # Series/Index with array
59
+ result = s .str .cat (t , na_rep = "-" )
60
+ tm .assert_equal (result , expected )
54
61
55
- # Series/Index with list
56
- result = s .str .cat (list (t ), na_rep = "-" )
57
- tm .assert_equal (result , expected )
62
+ # Series/Index with list
63
+ result = s .str .cat (list (t ), na_rep = "-" )
64
+ tm .assert_equal (result , expected )
58
65
59
- # errors for incorrect lengths
60
- rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
61
- z = Series (["1" , "2" , "3" ])
66
+ # errors for incorrect lengths
67
+ rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
68
+ z = Series (["1" , "2" , "3" ])
62
69
63
- with pytest .raises (ValueError , match = rgx ):
64
- s .str .cat (z .values )
70
+ with pytest .raises (ValueError , match = rgx ):
71
+ s .str .cat (z .values )
65
72
66
- with pytest .raises (ValueError , match = rgx ):
67
- s .str .cat (list (z ))
73
+ with pytest .raises (ValueError , match = rgx ):
74
+ s .str .cat (list (z ))
68
75
69
76
70
77
def test_str_cat_raises_intuitive_error (index_or_series ):
@@ -78,39 +85,54 @@ def test_str_cat_raises_intuitive_error(index_or_series):
78
85
s .str .cat (" " )
79
86
80
87
88
+ @pytest .mark .parametrize (
89
+ "infer_string" , [False , pytest .param (True , marks = td .skip_if_no ("pyarrow" ))]
90
+ )
81
91
@pytest .mark .parametrize ("sep" , ["" , None ])
82
92
@pytest .mark .parametrize ("dtype_target" , ["object" , "category" ])
83
93
@pytest .mark .parametrize ("dtype_caller" , ["object" , "category" ])
84
- def test_str_cat_categorical (index_or_series , dtype_caller , dtype_target , sep ):
94
+ def test_str_cat_categorical (
95
+ index_or_series , dtype_caller , dtype_target , sep , infer_string
96
+ ):
85
97
box = index_or_series
86
98
87
- s = Index (["a" , "a" , "b" , "a" ], dtype = dtype_caller )
88
- s = s if box == Index else Series (s , index = s )
89
- t = Index (["b" , "a" , "b" , "c" ], dtype = dtype_target )
90
-
91
- expected = Index (["ab" , "aa" , "bb" , "ac" ])
92
- expected = expected if box == Index else Series (expected , index = s )
99
+ with option_context ("future.infer_string" , infer_string ):
100
+ s = Index (["a" , "a" , "b" , "a" ], dtype = dtype_caller )
101
+ s = s if box == Index else Series (s , index = s )
102
+ t = Index (["b" , "a" , "b" , "c" ], dtype = dtype_target )
93
103
94
- # Series/Index with unaligned Index -> t.values
95
- result = s .str .cat (t .values , sep = sep )
96
- tm .assert_equal (result , expected )
97
-
98
- # Series/Index with Series having matching Index
99
- t = Series (t .values , index = s )
100
- result = s .str .cat (t , sep = sep )
101
- tm .assert_equal (result , expected )
102
-
103
- # Series/Index with Series.values
104
- result = s .str .cat (t .values , sep = sep )
105
- tm .assert_equal (result , expected )
104
+ expected = Index (["ab" , "aa" , "bb" , "ac" ])
105
+ expected = (
106
+ expected
107
+ if box == Index
108
+ else Series (expected , index = Index (s , dtype = dtype_caller ))
109
+ )
106
110
107
- # Series/Index with Series having different Index
108
- t = Series (t .values , index = t .values )
109
- expected = Index (["aa" , "aa" , "bb" , "bb" , "aa" ])
110
- expected = expected if box == Index else Series (expected , index = expected .str [:1 ])
111
+ # Series/Index with unaligned Index -> t.values
112
+ result = s .str .cat (t .values , sep = sep )
113
+ tm .assert_equal (result , expected )
114
+
115
+ # Series/Index with Series having matching Index
116
+ t = Series (t .values , index = Index (s , dtype = dtype_caller ))
117
+ result = s .str .cat (t , sep = sep )
118
+ tm .assert_equal (result , expected )
119
+
120
+ # Series/Index with Series.values
121
+ result = s .str .cat (t .values , sep = sep )
122
+ tm .assert_equal (result , expected )
123
+
124
+ # Series/Index with Series having different Index
125
+ t = Series (t .values , index = t .values )
126
+ expected = Index (["aa" , "aa" , "bb" , "bb" , "aa" ])
127
+ dtype = object if dtype_caller == "object" else s .dtype .categories .dtype
128
+ expected = (
129
+ expected
130
+ if box == Index
131
+ else Series (expected , index = Index (expected .str [:1 ], dtype = dtype ))
132
+ )
111
133
112
- result = s .str .cat (t , sep = sep )
113
- tm .assert_equal (result , expected )
134
+ result = s .str .cat (t , sep = sep )
135
+ tm .assert_equal (result , expected )
114
136
115
137
116
138
@pytest .mark .parametrize (
@@ -321,8 +343,9 @@ def test_str_cat_all_na(index_or_series, index_or_series2):
321
343
322
344
# all-NA target
323
345
if box == Series :
324
- expected = Series ([np .nan ] * 4 , index = s .index , dtype = object )
346
+ expected = Series ([np .nan ] * 4 , index = s .index , dtype = s . dtype )
325
347
else : # box == Index
348
+ # TODO: Strimg option, this should return string dtype
326
349
expected = Index ([np .nan ] * 4 , dtype = object )
327
350
result = s .str .cat (t , join = "left" )
328
351
tm .assert_equal (result , expected )
0 commit comments