From 8cfb15efa6ba874f9d606c78361053c15041f92b Mon Sep 17 00:00:00 2001 From: Mortada Mehyar Date: Fri, 1 May 2015 13:33:49 -0700 Subject: [PATCH] ENH: support StringMethods index and rindex --- doc/source/api.rst | 2 ++ doc/source/text.rst | 2 ++ doc/source/whatsnew/v0.16.1.txt | 3 +- pandas/core/strings.py | 56 +++++++++++++++++++++++++++++++++ pandas/tests/test_strings.py | 47 +++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 364b3ba04aefb..b708e35f3b6e1 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -536,6 +536,7 @@ strings and apply several methods to it. These can be acccessed like Series.str.find Series.str.findall Series.str.get + Series.str.index Series.str.join Series.str.len Series.str.ljust @@ -548,6 +549,7 @@ strings and apply several methods to it. These can be acccessed like Series.str.repeat Series.str.replace Series.str.rfind + Series.str.rindex Series.str.rjust Series.str.rpartition Series.str.rstrip diff --git a/doc/source/text.rst b/doc/source/text.rst index bb27fe52ba7a5..4ec041d19ce1b 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -268,6 +268,8 @@ Method Summary :meth:`~Series.str.upper`,Equivalent to ``str.upper`` :meth:`~Series.str.find`,Equivalent to ``str.find`` :meth:`~Series.str.rfind`,Equivalent to ``str.rfind`` + :meth:`~Series.str.index`,Equivalent to ``str.index`` + :meth:`~Series.str.rindex`,Equivalent to ``str.rindex`` :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize`` :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase`` :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize`` diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 275cadba3c598..a51a8ebc04535 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -189,12 +189,13 @@ String Methods Enhancements :ref:`Continuing from v0.16.0 `, following enhancements are performed to make string operation easier. -- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`) +- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`) ================ =============== =============== =============== ================ .. .. Methods .. .. ================ =============== =============== =============== ================ ``capitalize()`` ``swapcase()`` ``normalize()`` ``partition()`` ``rpartition()`` + ``index()`` ``rindex()`` ================ =============== =============== =============== ================ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 9de11f0193743..d92d164acdd4b 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -638,6 +638,26 @@ def str_find(arr, sub, start=0, end=None, side='left'): return _na_map(f, arr, dtype=int) +def str_index(arr, sub, start=0, end=None, side='left'): + if not isinstance(sub, compat.string_types): + msg = 'expected a string object, not {0}' + raise TypeError(msg.format(type(sub).__name__)) + + if side == 'left': + method = 'index' + elif side == 'right': + method = 'rindex' + else: # pragma: no cover + raise ValueError('Invalid side') + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + + return _na_map(f, arr, dtype=int) + + def str_pad(arr, width, side='left', fillchar=' '): """ Pad strings in the Series/Index with an additional character to @@ -1327,6 +1347,42 @@ def normalize(self, form): result = _na_map(f, self.series) return self._wrap_result(result) + _shared_docs['index'] = (""" + Return %(side)s indexes in each strings where the substring is + fully contained between [start:end]. This is the same as ``str.%(similar)s`` + except instead of returning -1, it raises a ValueError when the substring + is not found. Equivalent to standard ``str.%(method)s``. + + Parameters + ---------- + sub : str + Substring being searched + start : int + Left edge index + end : int + Right edge index + + Returns + ------- + found : Series/Index of objects + + See Also + -------- + %(also)s + """) + + @Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index', + also='rindex : Return highest indexes in each strings')) + def index(self, sub, start=0, end=None): + result = str_index(self.series, sub, start=start, end=end, side='left') + return self._wrap_result(result) + + @Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex', + also='index : Return lowest indexes in each strings')) + def rindex(self, sub, start=0, end=None): + result = str_index(self.series, sub, start=start, end=end, side='right') + return self._wrap_result(result) + _shared_docs['len'] = (""" Compute length of each string in the Series/Index. diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index b77b52c0e17a1..8cd8ac9f66a1f 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -908,6 +908,53 @@ def test_find_nan(self): result = values.str.rfind('EF', 3, 6) tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) + def test_index(self): + for klass in [Series, Index]: + s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF']) + + result = s.str.index('EF') + tm.assert_array_equal(result, klass([4, 3, 1, 0])) + expected = np.array([v.index('EF') for v in s.values]) + tm.assert_array_equal(result.values, expected) + + result = s.str.rindex('EF') + tm.assert_array_equal(result, klass([4, 5, 7, 4])) + expected = np.array([v.rindex('EF') for v in s.values]) + tm.assert_array_equal(result.values, expected) + + result = s.str.index('EF', 3) + tm.assert_array_equal(result, klass([4, 3, 7, 4])) + expected = np.array([v.index('EF', 3) for v in s.values]) + tm.assert_array_equal(result.values, expected) + + result = s.str.rindex('EF', 3) + tm.assert_array_equal(result, klass([4, 5, 7, 4])) + expected = np.array([v.rindex('EF', 3) for v in s.values]) + tm.assert_array_equal(result.values, expected) + + result = s.str.index('E', 4, 8) + tm.assert_array_equal(result, klass([4, 5, 7, 4])) + expected = np.array([v.index('E', 4, 8) for v in s.values]) + tm.assert_array_equal(result.values, expected) + + result = s.str.rindex('E', 0, 5) + tm.assert_array_equal(result, klass([4, 3, 1, 4])) + expected = np.array([v.rindex('E', 0, 5) for v in s.values]) + tm.assert_array_equal(result.values, expected) + + with tm.assertRaisesRegexp(ValueError, "substring not found"): + result = s.str.index('DE') + + with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + result = s.str.index(0) + + # test with nan + s = Series(['abcb', 'ab', 'bcbe', np.nan]) + result = s.str.index('b') + tm.assert_array_equal(result, Series([1, 1, 0, np.nan])) + result = s.str.rindex('b') + tm.assert_array_equal(result, Series([3, 1, 2, np.nan])) + def test_pad(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])