9
9
import numpy as np
10
10
from numpy .random import randint
11
11
12
- from pandas .compat import range , u
12
+ from pandas .compat import range , u , PY3
13
13
import pandas .compat as compat
14
14
from pandas import Index , Series , DataFrame , isna , MultiIndex , notna , concat
15
15
@@ -118,6 +118,55 @@ def any_string_method(request):
118
118
return request .param
119
119
120
120
121
+ # subset of the full set from pandas/conftest.py
122
+ _any_allowed_skipna_inferred_dtype = [
123
+ ('string' , ['a' , np .nan , 'c' ]),
124
+ ('unicode' if not PY3 else 'string' , [u ('a' ), np .nan , u ('c' )]),
125
+ ('bytes' if PY3 else 'string' , [b'a' , np .nan , b'c' ]),
126
+ ('empty' , [np .nan , np .nan , np .nan ]),
127
+ ('empty' , []),
128
+ ('mixed-integer' , ['a' , np .nan , 2 ])
129
+ ]
130
+ ids , _ = zip (* _any_allowed_skipna_inferred_dtype ) # use inferred type as id
131
+
132
+
133
+ @pytest .fixture (params = _any_allowed_skipna_inferred_dtype , ids = ids )
134
+ def any_allowed_skipna_inferred_dtype (request ):
135
+ """
136
+ Fixture for all (inferred) dtypes allowed in StringMethods.__init__
137
+
138
+ The covered (inferred) types are:
139
+ * 'string'
140
+ * 'unicode' (if PY2)
141
+ * 'empty'
142
+ * 'bytes' (if PY3)
143
+ * 'mixed'
144
+ * 'mixed-integer'
145
+
146
+ Returns
147
+ -------
148
+ inferred_dtype : str
149
+ The string for the inferred dtype from _libs.lib.infer_dtype
150
+ values : np.ndarray
151
+ An array of object dtype that will be inferred to have
152
+ `inferred_dtype`
153
+
154
+ Examples
155
+ --------
156
+ >>> import pandas._libs.lib as lib
157
+ >>>
158
+ >>> def test_something(any_allowed_skipna_inferred_dtype):
159
+ ... inferred_dtype, values = any_skipna_inferred_dtype
160
+ ... # will pass
161
+ ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
162
+ """
163
+ inferred_dtype , values = request .param
164
+ values = np .array (values , dtype = object ) # object dtype to avoid casting
165
+
166
+ # correctness of inference tested in tests/dtypes/test_inference.py
167
+ return inferred_dtype , values
168
+
169
+
121
170
class TestStringMethods (object ):
122
171
123
172
def test_api (self ):
@@ -126,11 +175,103 @@ def test_api(self):
126
175
assert Series .str is strings .StringMethods
127
176
assert isinstance (Series (['' ]).str , strings .StringMethods )
128
177
129
- # GH 9184
130
- invalid = Series ([1 ])
131
- with pytest .raises (AttributeError , match = "only use .str accessor" ):
132
- invalid .str
133
- assert not hasattr (invalid , 'str' )
178
+ @pytest .mark .parametrize ('dtype' , [object , 'category' ])
179
+ @pytest .mark .parametrize ('box' , [Series , Index ])
180
+ def test_api_per_dtype (self , box , dtype , any_skipna_inferred_dtype ):
181
+ # one instance of parametrized fixture
182
+ inferred_dtype , values = any_skipna_inferred_dtype
183
+
184
+ t = box (values , dtype = dtype ) # explicit dtype to avoid casting
185
+
186
+ # TODO: get rid of these xfails
187
+ if dtype == 'category' and inferred_dtype in ['period' , 'interval' ]:
188
+ pytest .xfail (reason = 'Conversion to numpy array fails because '
189
+ 'the ._values-attribute is not a numpy array for '
190
+ 'PeriodArray/IntervalArray; see GH 23553' )
191
+ if box == Index and inferred_dtype in ['empty' , 'bytes' ]:
192
+ pytest .xfail (reason = 'Raising too restrictively; '
193
+ 'solved by GH 23167' )
194
+ if (box == Index and dtype == object
195
+ and inferred_dtype in ['boolean' , 'date' , 'time' ]):
196
+ pytest .xfail (reason = 'Inferring incorrectly because of NaNs; '
197
+ 'solved by GH 23167' )
198
+ if (box == Series
199
+ and (dtype == object and inferred_dtype not in [
200
+ 'string' , 'unicode' , 'empty' ,
201
+ 'bytes' , 'mixed' , 'mixed-integer' ])
202
+ or (dtype == 'category'
203
+ and inferred_dtype in ['decimal' , 'boolean' , 'time' ])):
204
+ pytest .xfail (reason = 'Not raising correctly; solved by GH 23167' )
205
+
206
+ types_passing_constructor = ['string' , 'unicode' , 'empty' ,
207
+ 'bytes' , 'mixed' , 'mixed-integer' ]
208
+ if inferred_dtype in types_passing_constructor :
209
+ # GH 6106
210
+ assert isinstance (t .str , strings .StringMethods )
211
+ else :
212
+ # GH 9184, GH 23011, GH 23163
213
+ with pytest .raises (AttributeError , match = 'Can only use .str '
214
+ 'accessor with string values.*' ):
215
+ t .str
216
+ assert not hasattr (t , 'str' )
217
+
218
+ @pytest .mark .parametrize ('dtype' , [object , 'category' ])
219
+ @pytest .mark .parametrize ('box' , [Series , Index ])
220
+ def test_api_per_method (self , box , dtype ,
221
+ any_allowed_skipna_inferred_dtype ,
222
+ any_string_method ):
223
+ # this test does not check correctness of the different methods,
224
+ # just that the methods work on the specified (inferred) dtypes,
225
+ # and raise on all others
226
+
227
+ # one instance of each parametrized fixture
228
+ inferred_dtype , values = any_allowed_skipna_inferred_dtype
229
+ method_name , args , kwargs = any_string_method
230
+
231
+ # TODO: get rid of these xfails
232
+ if (method_name not in ['encode' , 'decode' , 'len' ]
233
+ and inferred_dtype == 'bytes' ):
234
+ pytest .xfail (reason = 'Not raising for "bytes", see GH 23011;'
235
+ 'Also: malformed method names, see GH 23551; '
236
+ 'solved by GH 23167' )
237
+ if (method_name == 'cat'
238
+ and inferred_dtype in ['mixed' , 'mixed-integer' ]):
239
+ pytest .xfail (reason = 'Bad error message; should raise better; '
240
+ 'solved by GH 23167' )
241
+ if box == Index and inferred_dtype in ['empty' , 'bytes' ]:
242
+ pytest .xfail (reason = 'Raising too restrictively; '
243
+ 'solved by GH 23167' )
244
+ if (box == Index and dtype == object
245
+ and inferred_dtype in ['boolean' , 'date' , 'time' ]):
246
+ pytest .xfail (reason = 'Inferring incorrectly because of NaNs; '
247
+ 'solved by GH 23167' )
248
+ if box == Index and dtype == 'category' :
249
+ pytest .xfail (reason = 'Broken methods on CategoricalIndex; '
250
+ 'see GH 23556' )
251
+
252
+ t = box (values , dtype = dtype ) # explicit dtype to avoid casting
253
+ method = getattr (t .str , method_name )
254
+
255
+ bytes_allowed = method_name in ['encode' , 'decode' , 'len' ]
256
+ # as of v0.23.4, all methods except 'cat' are very lenient with the
257
+ # allowed data types, just returning NaN for entries that error.
258
+ # This could be changed with an 'errors'-kwarg to the `str`-accessor,
259
+ # see discussion in GH 13877
260
+ mixed_allowed = method_name not in ['cat' ]
261
+
262
+ allowed_types = (['string' , 'unicode' , 'empty' ]
263
+ + ['bytes' ] * bytes_allowed
264
+ + ['mixed' , 'mixed-integer' ] * mixed_allowed )
265
+
266
+ if inferred_dtype in allowed_types :
267
+ method (* args , ** kwargs ) # works!
268
+ else :
269
+ # GH 23011, GH 23163
270
+ msg = ('Cannot use .str.{name} with values of inferred dtype '
271
+ '{inferred_dtype!r}.' .format (name = method_name ,
272
+ inferred_dtype = inferred_dtype ))
273
+ with pytest .raises (TypeError , match = msg ):
274
+ method (* args , ** kwargs )
134
275
135
276
def test_api_for_categorical (self , any_string_method ):
136
277
# https://github.com/pandas-dev/pandas/issues/10661
0 commit comments