Skip to content

Commit fd738f7

Browse files
committed
ENH: support StringMethods index and rindex
1 parent 3d769c4 commit fd738f7

File tree

5 files changed

+109
-1
lines changed

5 files changed

+109
-1
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ strings and apply several methods to it. These can be acccessed like
534534
Series.str.find
535535
Series.str.findall
536536
Series.str.get
537+
Series.str.index
537538
Series.str.join
538539
Series.str.len
539540
Series.str.ljust
@@ -544,6 +545,7 @@ strings and apply several methods to it. These can be acccessed like
544545
Series.str.repeat
545546
Series.str.replace
546547
Series.str.rfind
548+
Series.str.rindex
547549
Series.str.rjust
548550
Series.str.rstrip
549551
Series.str.slice

doc/source/text.rst

+2
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ Method Summary
266266
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
267267
:meth:`~Series.str.find`,Equivalent to ``str.find``
268268
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
269+
:meth:`~Series.str.index`,Equivalent to ``str.index``
270+
:meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
269271
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
270272
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
271273
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``

doc/source/whatsnew/v0.16.1.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ Highlights include:
2323
Enhancements
2424
~~~~~~~~~~~~
2525

26-
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
26+
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave the same as standard ``str`` (:issue:`9766`)
27+
- Added ``StringMethods.index()`` and ``rindex`` which behave the same as standard ``str`` (:issue:`10045`)
2728
- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
2829
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
2930
- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).

pandas/core/strings.py

+56
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,26 @@ def str_find(arr, sub, start=0, end=None, side='left'):
629629
return _na_map(f, arr, dtype=int)
630630

631631

632+
def str_index(arr, sub, start=0, end=None, side='left'):
633+
if not isinstance(sub, compat.string_types):
634+
msg = 'expected a string object, not {0}'
635+
raise TypeError(msg.format(type(sub).__name__))
636+
637+
if side == 'left':
638+
method = 'index'
639+
elif side == 'right':
640+
method = 'rindex'
641+
else: # pragma: no cover
642+
raise ValueError('Invalid side')
643+
644+
if end is None:
645+
f = lambda x: getattr(x, method)(sub, start)
646+
else:
647+
f = lambda x: getattr(x, method)(sub, start, end)
648+
649+
return _na_map(f, arr, dtype=int)
650+
651+
632652
def str_pad(arr, width, side='left', fillchar=' '):
633653
"""
634654
Pad strings in the Series/Index with an additional character to
@@ -1206,6 +1226,42 @@ def rfind(self, sub, start=0, end=None):
12061226
result = str_find(self.series, sub, start=start, end=end, side='right')
12071227
return self._wrap_result(result)
12081228

1229+
_shared_docs['index'] = ("""
1230+
Return %(side)s indexes in each strings where the substring is
1231+
fully contained between [start:end]. This is the same as ``str.%(similar)s``
1232+
except instead of returning -1, it raises a ValueError when the substring
1233+
is not found. Equivalent to standard ``str.%(method)s``.
1234+
1235+
Parameters
1236+
----------
1237+
sub : str
1238+
Substring being searched
1239+
start : int
1240+
Left edge index
1241+
end : int
1242+
Right edge index
1243+
1244+
Returns
1245+
-------
1246+
found : Series/Index of objects
1247+
1248+
See Also
1249+
--------
1250+
%(also)s
1251+
""")
1252+
1253+
@Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
1254+
also='rindex : Return highest indexes in each strings'))
1255+
def index(self, sub, start=0, end=None):
1256+
result = str_index(self.series, sub, start=start, end=end, side='left')
1257+
return self._wrap_result(result)
1258+
1259+
@Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
1260+
also='index : Return lowest indexes in each strings'))
1261+
def rindex(self, sub, start=0, end=None):
1262+
result = str_index(self.series, sub, start=start, end=end, side='right')
1263+
return self._wrap_result(result)
1264+
12091265
_shared_docs['len'] = ("""
12101266
Compute length of each string in the Series/Index.
12111267

pandas/tests/test_strings.py

+47
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,53 @@ def test_find_nan(self):
881881
result = values.str.rfind('EF', 3, 6)
882882
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
883883

884+
def test_index(self):
885+
for klass in [Series, Index]:
886+
s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF'])
887+
888+
result = s.str.index('EF')
889+
tm.assert_array_equal(result, klass([4, 3, 1, 0]))
890+
expected = np.array([v.index('EF') for v in s.values])
891+
tm.assert_array_equal(result.values, expected)
892+
893+
result = s.str.rindex('EF')
894+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
895+
expected = np.array([v.rindex('EF') for v in s.values])
896+
tm.assert_array_equal(result.values, expected)
897+
898+
result = s.str.index('EF', 3)
899+
tm.assert_array_equal(result, klass([4, 3, 7, 4]))
900+
expected = np.array([v.index('EF', 3) for v in s.values])
901+
tm.assert_array_equal(result.values, expected)
902+
903+
result = s.str.rindex('EF', 3)
904+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
905+
expected = np.array([v.rindex('EF', 3) for v in s.values])
906+
tm.assert_array_equal(result.values, expected)
907+
908+
result = s.str.index('E', 4, 8)
909+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
910+
expected = np.array([v.index('E', 4, 8) for v in s.values])
911+
tm.assert_array_equal(result.values, expected)
912+
913+
result = s.str.rindex('E', 0, 5)
914+
tm.assert_array_equal(result, klass([4, 3, 1, 4]))
915+
expected = np.array([v.rindex('E', 0, 5) for v in s.values])
916+
tm.assert_array_equal(result.values, expected)
917+
918+
with tm.assertRaisesRegexp(ValueError, "substring not found"):
919+
result = s.str.index('DE')
920+
921+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
922+
result = s.str.index(0)
923+
924+
# test with nan
925+
s = Series(['abcb', 'ab', 'bcbe', np.nan])
926+
result = s.str.index('b')
927+
tm.assert_array_equal(result, Series([1, 1, 0, np.nan]))
928+
result = s.str.rindex('b')
929+
tm.assert_array_equal(result, Series([3, 1, 2, np.nan]))
930+
884931
def test_pad(self):
885932
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
886933

0 commit comments

Comments
 (0)