Skip to content

Commit 80080fb

Browse files
committed
ENH: support StringMethods index and rindex
1 parent 2e087c7 commit 80080fb

File tree

5 files changed

+109
-1
lines changed

5 files changed

+109
-1
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ strings and apply several methods to it. These can be acccessed like
536536
Series.str.find
537537
Series.str.findall
538538
Series.str.get
539+
Series.str.index
539540
Series.str.join
540541
Series.str.len
541542
Series.str.ljust
@@ -547,6 +548,7 @@ strings and apply several methods to it. These can be acccessed like
547548
Series.str.repeat
548549
Series.str.replace
549550
Series.str.rfind
551+
Series.str.rindex
550552
Series.str.rjust
551553
Series.str.rstrip
552554
Series.str.slice

doc/source/text.rst

+2
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ Method Summary
266266
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
267267
:meth:`~Series.str.find`,Equivalent to ``str.find``
268268
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
269+
:meth:`~Series.str.index`,Equivalent to ``str.index``
270+
:meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
269271
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
270272
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
271273
:meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``

doc/source/whatsnew/v0.16.1.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ Enhancements
3737
Timestamp('2014-08-01 07:00') + BusinessHour()
3838
Timestamp('2014-08-01 16:30') + BusinessHour()
3939

40-
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
40+
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave the same as standard ``str`` (:issue:`9766`)
41+
- Added ``StringMethods.index()`` and ``rindex`` which behave the same as standard ``str`` (:issue:`10045`)
4142
- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
4243
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
4344
- Added ``StringMethods.normalize()`` which behaves the same as standard :func:`unicodedata.normalizes` (:issue:`10031`)

pandas/core/strings.py

+56
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,26 @@ def str_find(arr, sub, start=0, end=None, side='left'):
629629
return _na_map(f, arr, dtype=int)
630630

631631

632+
def str_index(arr, sub, start=0, end=None, side='left'):
633+
if not isinstance(sub, compat.string_types):
634+
msg = 'expected a string object, not {0}'
635+
raise TypeError(msg.format(type(sub).__name__))
636+
637+
if side == 'left':
638+
method = 'index'
639+
elif side == 'right':
640+
method = 'rindex'
641+
else: # pragma: no cover
642+
raise ValueError('Invalid side')
643+
644+
if end is None:
645+
f = lambda x: getattr(x, method)(sub, start)
646+
else:
647+
f = lambda x: getattr(x, method)(sub, start, end)
648+
649+
return _na_map(f, arr, dtype=int)
650+
651+
632652
def str_pad(arr, width, side='left', fillchar=' '):
633653
"""
634654
Pad strings in the Series/Index with an additional character to
@@ -1225,6 +1245,42 @@ def normalize(self, form):
12251245
result = _na_map(f, self.series)
12261246
return self._wrap_result(result)
12271247

1248+
_shared_docs['index'] = ("""
1249+
Return %(side)s indexes in each strings where the substring is
1250+
fully contained between [start:end]. This is the same as ``str.%(similar)s``
1251+
except instead of returning -1, it raises a ValueError when the substring
1252+
is not found. Equivalent to standard ``str.%(method)s``.
1253+
1254+
Parameters
1255+
----------
1256+
sub : str
1257+
Substring being searched
1258+
start : int
1259+
Left edge index
1260+
end : int
1261+
Right edge index
1262+
1263+
Returns
1264+
-------
1265+
found : Series/Index of objects
1266+
1267+
See Also
1268+
--------
1269+
%(also)s
1270+
""")
1271+
1272+
@Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
1273+
also='rindex : Return highest indexes in each strings'))
1274+
def index(self, sub, start=0, end=None):
1275+
result = str_index(self.series, sub, start=start, end=end, side='left')
1276+
return self._wrap_result(result)
1277+
1278+
@Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
1279+
also='index : Return lowest indexes in each strings'))
1280+
def rindex(self, sub, start=0, end=None):
1281+
result = str_index(self.series, sub, start=start, end=end, side='right')
1282+
return self._wrap_result(result)
1283+
12281284
_shared_docs['len'] = ("""
12291285
Compute length of each string in the Series/Index.
12301286

pandas/tests/test_strings.py

+47
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,53 @@ def test_find_nan(self):
882882
result = values.str.rfind('EF', 3, 6)
883883
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
884884

885+
def test_index(self):
886+
for klass in [Series, Index]:
887+
s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF'])
888+
889+
result = s.str.index('EF')
890+
tm.assert_array_equal(result, klass([4, 3, 1, 0]))
891+
expected = np.array([v.index('EF') for v in s.values])
892+
tm.assert_array_equal(result.values, expected)
893+
894+
result = s.str.rindex('EF')
895+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
896+
expected = np.array([v.rindex('EF') for v in s.values])
897+
tm.assert_array_equal(result.values, expected)
898+
899+
result = s.str.index('EF', 3)
900+
tm.assert_array_equal(result, klass([4, 3, 7, 4]))
901+
expected = np.array([v.index('EF', 3) for v in s.values])
902+
tm.assert_array_equal(result.values, expected)
903+
904+
result = s.str.rindex('EF', 3)
905+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
906+
expected = np.array([v.rindex('EF', 3) for v in s.values])
907+
tm.assert_array_equal(result.values, expected)
908+
909+
result = s.str.index('E', 4, 8)
910+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
911+
expected = np.array([v.index('E', 4, 8) for v in s.values])
912+
tm.assert_array_equal(result.values, expected)
913+
914+
result = s.str.rindex('E', 0, 5)
915+
tm.assert_array_equal(result, klass([4, 3, 1, 4]))
916+
expected = np.array([v.rindex('E', 0, 5) for v in s.values])
917+
tm.assert_array_equal(result.values, expected)
918+
919+
with tm.assertRaisesRegexp(ValueError, "substring not found"):
920+
result = s.str.index('DE')
921+
922+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
923+
result = s.str.index(0)
924+
925+
# test with nan
926+
s = Series(['abcb', 'ab', 'bcbe', np.nan])
927+
result = s.str.index('b')
928+
tm.assert_array_equal(result, Series([1, 1, 0, np.nan]))
929+
result = s.str.rindex('b')
930+
tm.assert_array_equal(result, Series([3, 1, 2, np.nan]))
931+
885932
def test_pad(self):
886933
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
887934

0 commit comments

Comments
 (0)