diff --git a/doc/source/api.rst b/doc/source/api.rst index fa898a95e6694..149421bde28c8 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -531,6 +531,7 @@ strings and apply several methods to it. These can be acccessed like Series.str.encode Series.str.endswith Series.str.extract + Series.str.find Series.str.findall Series.str.get Series.str.join @@ -542,6 +543,7 @@ strings and apply several methods to it. These can be acccessed like Series.str.pad Series.str.repeat Series.str.replace + Series.str.rfind Series.str.rjust Series.str.rstrip Series.str.slice diff --git a/doc/source/text.rst b/doc/source/text.rst index debf24f21c735..af32549893dde 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -231,6 +231,8 @@ Method Summary :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip`` :meth:`~Series.str.lower`,Equivalent to ``str.lower`` :meth:`~Series.str.upper`,Equivalent to ``str.upper`` + :meth:`~Series.str.find`,Equivalent to ``str.find`` + :meth:`~Series.str.rfind`,Equivalent to ``str.rfind`` :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum`` :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha`` :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit`` diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 238a838cf727e..247be953983d5 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -156,6 +156,7 @@ Enhancements - Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) +- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`) - Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) - Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 605f3a42651fb..93ad2066d0e12 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -544,6 +544,46 @@ def str_findall(arr, pat, flags=0): return _na_map(regex.findall, arr) +def str_find(arr, sub, start=0, end=None, side='left'): + """ + Return indexes in each strings where the substring is + fully contained between [start:end]. Return -1 on failure. + + Parameters + ---------- + sub : str + Substring being searched + start : int + Left edge index + end : int + Right edge index + side : {'left', 'right'}, default 'left' + Specifies a starting side, equivalent to ``find`` or ``rfind`` + + Returns + ------- + found : array + """ + + if not isinstance(sub, compat.string_types): + msg = 'expected a string object, not {0}' + raise TypeError(msg.format(type(sub).__name__)) + + if side == 'left': + method = 'find' + elif side == 'right': + method = 'rfind' + else: # pragma: no cover + raise ValueError('Invalid side') + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + + return _na_map(f, arr, dtype=int) + + def str_pad(arr, width, side='left', fillchar=' '): """ Pad strings with an additional character @@ -1072,6 +1112,41 @@ def get_dummies(self, sep='|'): findall = _pat_wrapper(str_findall, flags=True) extract = _pat_wrapper(str_extract, flags=True) + _shared_docs['find'] = (""" + Return %(side)s indexes in each strings where the substring is + fully contained between [start:end]. Return -1 on failure. + Equivalent to standard ``str.%(method)s``. + + Parameters + ---------- + sub : str + Substring being searched + start : int + Left edge index + end : int + Right edge index + + Returns + ------- + found : array + + See Also + -------- + %(also)s + """) + + @Appender(_shared_docs['find'] % dict(side='lowest', method='find', + also='rfind : Return highest indexes in each strings')) + def find(self, sub, start=0, end=None): + result = str_find(self.series, sub, start=start, end=end, side='left') + return self._wrap_result(result) + + @Appender(_shared_docs['find'] % dict(side='highest', method='rfind', + also='find : Return lowest indexes in each strings')) + def rfind(self, sub, start=0, end=None): + result = str_find(self.series, sub, start=start, end=end, side='right') + return self._wrap_result(result) + _shared_docs['len'] = (""" Compute length of each string in array. diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0d9875fb9d4b1..d89ff98010d13 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -610,6 +610,8 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty_list.str.join('')) tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_list, empty_list.str.findall('a')) + tm.assert_series_equal(empty_int, empty.str.find('a')) + tm.assert_series_equal(empty_int, empty.str.rfind('a')) tm.assert_series_equal(empty_str, empty.str.pad(42)) tm.assert_series_equal(empty_str, empty.str.center(42)) tm.assert_series_equal(empty_list, empty.str.split('a')) @@ -770,6 +772,64 @@ def test_findall(self): exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]]) tm.assert_almost_equal(result, exp) + def test_find(self): + values = Series(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF', 'XXXX']) + result = values.str.find('EF') + tm.assert_series_equal(result, Series([4, 3, 1, 0, -1])) + expected = np.array([v.find('EF') for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind('EF') + tm.assert_series_equal(result, Series([4, 5, 7, 4, -1])) + expected = np.array([v.rfind('EF') for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.find('EF', 3) + tm.assert_series_equal(result, Series([4, 3, 7, 4, -1])) + expected = np.array([v.find('EF', 3) for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind('EF', 3) + tm.assert_series_equal(result, Series([4, 5, 7, 4, -1])) + expected = np.array([v.rfind('EF', 3) for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.find('EF', 3, 6) + tm.assert_series_equal(result, Series([4, 3, -1, 4, -1])) + expected = np.array([v.find('EF', 3, 6) for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind('EF', 3, 6) + tm.assert_series_equal(result, Series([4, 3, -1, 4, -1])) + expected = np.array([v.rfind('EF', 3, 6) for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + result = values.str.find(0) + + with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + result = values.str.rfind(0) + + def test_find_nan(self): + values = Series(['ABCDEFG', np.nan, 'DEFGHIJEF', np.nan, 'XXXX']) + result = values.str.find('EF') + tm.assert_series_equal(result, Series([4, np.nan, 1, np.nan, -1])) + + result = values.str.rfind('EF') + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.find('EF', 3) + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.rfind('EF', 3) + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.find('EF', 3, 6) + tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) + + result = values.str.rfind('EF', 3, 6) + tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) + def test_pad(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])