4
4
import numpy as np
5
5
import pytest
6
6
7
+ from pandas ._config import using_string_dtype
8
+
9
+ from pandas .compat import HAS_PYARROW
7
10
from pandas .errors import PerformanceWarning
8
11
import pandas .util ._test_decorators as td
9
12
@@ -167,7 +170,16 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
167
170
# https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416
168
171
169
172
values = Series (["a" , "b" , "c" , "a" , np .nan ], dtype = nullable_string_dtype )
170
- result = values .str .contains ("a" , na = na , regex = regex )
173
+
174
+ msg = (
175
+ "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
176
+ "will raise in a future version"
177
+ )
178
+ warn = None
179
+ if not pd .isna (na ) and not isinstance (na , bool ):
180
+ warn = FutureWarning
181
+ with tm .assert_produces_warning (warn , match = msg ):
182
+ result = values .str .contains ("a" , na = na , regex = regex )
171
183
expected = Series ([True , False , False , True , expected ], dtype = "boolean" )
172
184
tm .assert_series_equal (result , expected )
173
185
@@ -233,6 +245,7 @@ def test_contains_nan(any_string_dtype):
233
245
expected = Series ([True , True , True ], dtype = expected_dtype )
234
246
tm .assert_series_equal (result , expected )
235
247
248
+ # TODO(infer_string)
236
249
# this particular combination of events is broken on 2.3
237
250
# would require cherry picking #58483, which in turn requires #57481
238
251
# which introduce many behavioral changes
@@ -241,14 +254,19 @@ def test_contains_nan(any_string_dtype):
241
254
and any_string_dtype .storage == "python"
242
255
and any_string_dtype .na_value is np .nan
243
256
):
244
- result = s .str .contains ("foo" , na = "foo" )
257
+ msg = (
258
+ "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
259
+ "will raise in a future version"
260
+ )
261
+ with tm .assert_produces_warning (FutureWarning , match = msg ):
262
+ result = s .str .contains ("foo" , na = "foo" )
245
263
if any_string_dtype == "object" :
246
264
expected = Series (["foo" , "foo" , "foo" ], dtype = np .object_ )
247
265
elif any_string_dtype .na_value is np .nan :
248
266
expected = Series ([True , True , True ], dtype = np .bool_ )
249
267
else :
250
268
expected = Series ([True , True , True ], dtype = "boolean" )
251
- tm .assert_series_equal (result , expected )
269
+ tm .assert_series_equal (result , expected )
252
270
253
271
result = s .str .contains ("foo" )
254
272
expected_dtype = (
@@ -263,6 +281,37 @@ def test_contains_nan(any_string_dtype):
263
281
# --------------------------------------------------------------------------------------
264
282
265
283
284
+ @pytest .mark .xfail (
285
+ using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
286
+ )
287
+ def test_startswith_endswith_validate_na (any_string_dtype ):
288
+ # GH#59615
289
+ ser = Series (
290
+ ["om" , np .nan , "foo_nom" , "nom" , "bar_foo" , np .nan , "foo" ],
291
+ dtype = any_string_dtype ,
292
+ )
293
+
294
+ dtype = ser .dtype
295
+ if (
296
+ isinstance (dtype , pd .StringDtype ) and dtype .storage == "python"
297
+ ) or dtype == np .dtype ("object" ):
298
+ msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
299
+ with tm .assert_produces_warning (FutureWarning , match = msg ):
300
+ ser .str .startswith ("kapow" , na = "baz" )
301
+ msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
302
+ with tm .assert_produces_warning (FutureWarning , match = msg ):
303
+ ser .str .endswith ("bar" , na = "baz" )
304
+ else :
305
+ # TODO(infer_string): don't surface pyarrow errors
306
+ import pyarrow as pa
307
+
308
+ msg = "Could not convert 'baz' with type str: tried to convert to boolean"
309
+ with pytest .raises (pa .lib .ArrowInvalid , match = msg ):
310
+ ser .str .startswith ("kapow" , na = "baz" )
311
+ with pytest .raises (pa .lib .ArrowInvalid , match = msg ):
312
+ ser .str .endswith ("kapow" , na = "baz" )
313
+
314
+
266
315
@pytest .mark .parametrize ("pat" , ["foo" , ("foo" , "baz" )])
267
316
@pytest .mark .parametrize ("dtype" , ["object" , "category" ])
268
317
@pytest .mark .parametrize ("null_value" , [None , np .nan , pd .NA ])
0 commit comments