Skip to content

Commit 6178eaa

Browse files
committed
Merge pull request #10045 from mortada/str_index
ENH: support StringMethods index and rindex
2 parents 8b7c22d + 8cfb15e commit 6178eaa

File tree

5 files changed

+109
-1
lines changed

5 files changed

+109
-1
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ strings and apply several methods to it. These can be acccessed like
536536
Series.str.find
537537
Series.str.findall
538538
Series.str.get
539+
Series.str.index
539540
Series.str.join
540541
Series.str.len
541542
Series.str.ljust
@@ -548,6 +549,7 @@ strings and apply several methods to it. These can be acccessed like
548549
Series.str.repeat
549550
Series.str.replace
550551
Series.str.rfind
552+
Series.str.rindex
551553
Series.str.rjust
552554
Series.str.rpartition
553555
Series.str.rstrip

doc/source/text.rst

+2
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ Method Summary
268268
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
269269
:meth:`~Series.str.find`,Equivalent to ``str.find``
270270
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
271+
:meth:`~Series.str.index`,Equivalent to ``str.index``
272+
:meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
271273
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
272274
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
273275
:meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``

doc/source/whatsnew/v0.16.1.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,13 @@ String Methods Enhancements
189189
:ref:`Continuing from v0.16.0 <whatsnew_0160.enhancements.string>`, following
190190
enhancements are performed to make string operation easier.
191191

192-
- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`)
192+
- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`)
193193

194194
================ =============== =============== =============== ================
195195
.. .. Methods .. ..
196196
================ =============== =============== =============== ================
197197
``capitalize()`` ``swapcase()`` ``normalize()`` ``partition()`` ``rpartition()``
198+
``index()`` ``rindex()``
198199
================ =============== =============== =============== ================
199200

200201

pandas/core/strings.py

+56
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,26 @@ def str_find(arr, sub, start=0, end=None, side='left'):
638638
return _na_map(f, arr, dtype=int)
639639

640640

641+
def str_index(arr, sub, start=0, end=None, side='left'):
642+
if not isinstance(sub, compat.string_types):
643+
msg = 'expected a string object, not {0}'
644+
raise TypeError(msg.format(type(sub).__name__))
645+
646+
if side == 'left':
647+
method = 'index'
648+
elif side == 'right':
649+
method = 'rindex'
650+
else: # pragma: no cover
651+
raise ValueError('Invalid side')
652+
653+
if end is None:
654+
f = lambda x: getattr(x, method)(sub, start)
655+
else:
656+
f = lambda x: getattr(x, method)(sub, start, end)
657+
658+
return _na_map(f, arr, dtype=int)
659+
660+
641661
def str_pad(arr, width, side='left', fillchar=' '):
642662
"""
643663
Pad strings in the Series/Index with an additional character to
@@ -1327,6 +1347,42 @@ def normalize(self, form):
13271347
result = _na_map(f, self.series)
13281348
return self._wrap_result(result)
13291349

1350+
_shared_docs['index'] = ("""
1351+
Return %(side)s indexes in each strings where the substring is
1352+
fully contained between [start:end]. This is the same as ``str.%(similar)s``
1353+
except instead of returning -1, it raises a ValueError when the substring
1354+
is not found. Equivalent to standard ``str.%(method)s``.
1355+
1356+
Parameters
1357+
----------
1358+
sub : str
1359+
Substring being searched
1360+
start : int
1361+
Left edge index
1362+
end : int
1363+
Right edge index
1364+
1365+
Returns
1366+
-------
1367+
found : Series/Index of objects
1368+
1369+
See Also
1370+
--------
1371+
%(also)s
1372+
""")
1373+
1374+
@Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
1375+
also='rindex : Return highest indexes in each strings'))
1376+
def index(self, sub, start=0, end=None):
1377+
result = str_index(self.series, sub, start=start, end=end, side='left')
1378+
return self._wrap_result(result)
1379+
1380+
@Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
1381+
also='index : Return lowest indexes in each strings'))
1382+
def rindex(self, sub, start=0, end=None):
1383+
result = str_index(self.series, sub, start=start, end=end, side='right')
1384+
return self._wrap_result(result)
1385+
13301386
_shared_docs['len'] = ("""
13311387
Compute length of each string in the Series/Index.
13321388

pandas/tests/test_strings.py

+47
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,53 @@ def test_find_nan(self):
908908
result = values.str.rfind('EF', 3, 6)
909909
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
910910

911+
def test_index(self):
912+
for klass in [Series, Index]:
913+
s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF'])
914+
915+
result = s.str.index('EF')
916+
tm.assert_array_equal(result, klass([4, 3, 1, 0]))
917+
expected = np.array([v.index('EF') for v in s.values])
918+
tm.assert_array_equal(result.values, expected)
919+
920+
result = s.str.rindex('EF')
921+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
922+
expected = np.array([v.rindex('EF') for v in s.values])
923+
tm.assert_array_equal(result.values, expected)
924+
925+
result = s.str.index('EF', 3)
926+
tm.assert_array_equal(result, klass([4, 3, 7, 4]))
927+
expected = np.array([v.index('EF', 3) for v in s.values])
928+
tm.assert_array_equal(result.values, expected)
929+
930+
result = s.str.rindex('EF', 3)
931+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
932+
expected = np.array([v.rindex('EF', 3) for v in s.values])
933+
tm.assert_array_equal(result.values, expected)
934+
935+
result = s.str.index('E', 4, 8)
936+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
937+
expected = np.array([v.index('E', 4, 8) for v in s.values])
938+
tm.assert_array_equal(result.values, expected)
939+
940+
result = s.str.rindex('E', 0, 5)
941+
tm.assert_array_equal(result, klass([4, 3, 1, 4]))
942+
expected = np.array([v.rindex('E', 0, 5) for v in s.values])
943+
tm.assert_array_equal(result.values, expected)
944+
945+
with tm.assertRaisesRegexp(ValueError, "substring not found"):
946+
result = s.str.index('DE')
947+
948+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
949+
result = s.str.index(0)
950+
951+
# test with nan
952+
s = Series(['abcb', 'ab', 'bcbe', np.nan])
953+
result = s.str.index('b')
954+
tm.assert_array_equal(result, Series([1, 1, 0, np.nan]))
955+
result = s.str.rindex('b')
956+
tm.assert_array_equal(result, Series([3, 1, 2, np.nan]))
957+
911958
def test_pad(self):
912959
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
913960

0 commit comments

Comments
 (0)