@@ -262,29 +262,123 @@ def str_count(arr, pat, flags=0):
262
262
263
263
def str_contains (arr , pat , case = True , flags = 0 , na = np .nan , regex = True ):
264
264
"""
265
- Return boolean Series/``array`` whether given pattern/regex is
266
- contained in each string in the Series/Index.
265
+ Test if pattern or regex is contained within a string of a Series or Index.
266
+
267
+ Return boolean Series or Index based on whether a given pattern or regex is
268
+ contained within a string of a Series or Index.
267
269
268
270
Parameters
269
271
----------
270
- pat : string
271
- Character sequence or regular expression
272
- case : boolean , default True
273
- If True, case sensitive
272
+ pat : str
273
+ Character sequence or regular expression.
274
+ case : bool , default True
275
+ If True, case sensitive.
274
276
flags : int, default 0 (no flags)
275
- re module flags, e.g. re.IGNORECASE
276
- na : default NaN, fill value for missing values.
277
+ Flags to pass through to the re module, e.g. re.IGNORECASE.
278
+ na : default NaN
279
+ Fill value for missing values.
277
280
regex : bool, default True
278
- If True use re.search, otherwise use Python in operator
281
+ If True, assumes the pat is a regular expression.
282
+
283
+ If False, treats the pat as a literal string.
279
284
280
285
Returns
281
286
-------
282
- contained : Series/array of boolean values
287
+ Series or Index of boolean values
288
+ A Series or Index of boolean values indicating whether the
289
+ given pattern is contained within the string of each element
290
+ of the Series or Index.
283
291
284
292
See Also
285
293
--------
286
294
match : analogous, but stricter, relying on re.match instead of re.search
287
295
296
+ Examples
297
+ --------
298
+
299
+ Returning a Series of booleans using only a literal pattern.
300
+
301
+ >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
302
+ >>> s1.str.contains('og', regex=False)
303
+ 0 False
304
+ 1 True
305
+ 2 False
306
+ 3 False
307
+ 4 NaN
308
+ dtype: object
309
+
310
+ Returning an Index of booleans using only a literal pattern.
311
+
312
+ >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
313
+ >>> ind.str.contains('23', regex=False)
314
+ Index([False, False, False, True, nan], dtype='object')
315
+
316
+ Specifying case sensitivity using `case`.
317
+
318
+ >>> s1.str.contains('oG', case=True, regex=True)
319
+ 0 False
320
+ 1 False
321
+ 2 False
322
+ 3 False
323
+ 4 NaN
324
+ dtype: object
325
+
326
+ Specifying `na` to be `False` instead of `NaN` replaces NaN values
327
+ with `False`. If Series or Index does not contain NaN values
328
+ the resultant dtype will be `bool`, otherwise, an `object` dtype.
329
+
330
+ >>> s1.str.contains('og', na=False, regex=True)
331
+ 0 False
332
+ 1 True
333
+ 2 False
334
+ 3 False
335
+ 4 False
336
+ dtype: bool
337
+
338
+ Returning 'house' and 'parrot' within same string.
339
+
340
+ >>> s1.str.contains('house|parrot', regex=True)
341
+ 0 False
342
+ 1 False
343
+ 2 True
344
+ 3 False
345
+ 4 NaN
346
+ dtype: object
347
+
348
+ Ignoring case sensitivity using `flags` with regex.
349
+
350
+ >>> import re
351
+ >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
352
+ 0 False
353
+ 1 False
354
+ 2 True
355
+ 3 False
356
+ 4 NaN
357
+ dtype: object
358
+
359
+ Returning any digit using regular expression.
360
+
361
+ >>> s1.str.contains('\d', regex=True)
362
+ 0 False
363
+ 1 False
364
+ 2 False
365
+ 3 True
366
+ 4 NaN
367
+ dtype: object
368
+
369
+ Ensure `pat` is a not a literal pattern when `regex` is set to True.
370
+ Note in the following example one might expect only `s2[1]` and `s2[3]` to
371
+ return `True`. However, '.0' as a regex matches any character
372
+ followed by a 0.
373
+
374
+ >>> s2 = pd.Series(['40','40.0','41','41.0','35'])
375
+ >>> s2.str.contains('.0', regex=True)
376
+ 0 True
377
+ 1 True
378
+ 2 False
379
+ 3 True
380
+ 4 False
381
+ dtype: bool
288
382
"""
289
383
if regex :
290
384
if not case :
0 commit comments