From c7e972105b605be7c1f6928cc520cdc059ea081e Mon Sep 17 00:00:00 2001 From: Josh Howes Date: Thu, 8 Sep 2016 05:49:22 -0400 Subject: [PATCH] BUG: fix str.contains for series containing only nan values --- doc/source/whatsnew/v0.19.0.txt | 1 + doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/strings.py | 3 ++- pandas/tests/test_strings.py | 20 ++++++++++++++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index a007500322ed4..e077b126ae975 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1567,3 +1567,4 @@ Bug Fixes - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- Bug in ``str.contains()`` for series containing only nan values (:issue:`14171`) \ No newline at end of file diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 695e917c76ba0..4aee6f72b1d53 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -81,3 +81,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b49761367b9b5..3041b17b99b17 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -165,7 +165,8 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): if na_mask: mask = isnull(arr) try: - result = lib.map_infer_mask(arr, f, mask.view(np.uint8)) + convert = not all(mask) + result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert) except (TypeError, AttributeError): def g(x): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 92fa7b976eb0e..4019bbe20ea1a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2439,6 +2439,26 @@ def test_more_contains(self): True, False, False]) assert_series_equal(result, expected) + def test_contains_nan(self): + # PR #14171 + s = Series([np.nan, np.nan, np.nan], dtype=np.object_) + + result = s.str.contains('foo', na=False) + expected = Series([False, False, False], dtype=np.bool_) + assert_series_equal(result, expected) + + result = s.str.contains('foo', na=True) + expected = Series([True, True, True], dtype=np.bool_) + assert_series_equal(result, expected) + + result = s.str.contains('foo', na="foo") + expected = Series(["foo", "foo", "foo"], dtype=np.object_) + assert_series_equal(result, expected) + + result = s.str.contains('foo') + expected = Series([np.nan, np.nan, np.nan], dtype=np.object_) + assert_series_equal(result, expected) + def test_more_replace(self): # PR #1179 s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',