Skip to content

Commit e44663a

Browse files
committed
ENH: support StringMethods index and rindex
1 parent 2cf4132 commit e44663a

File tree

4 files changed

+103
-4
lines changed

4 files changed

+103
-4
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ strings and apply several methods to it. These can be acccessed like
534534
Series.str.find
535535
Series.str.findall
536536
Series.str.get
537+
Series.str.index
537538
Series.str.join
538539
Series.str.len
539540
Series.str.ljust
@@ -544,6 +545,7 @@ strings and apply several methods to it. These can be acccessed like
544545
Series.str.repeat
545546
Series.str.replace
546547
Series.str.rfind
548+
Series.str.rindex
547549
Series.str.rjust
548550
Series.str.rstrip
549551
Series.str.slice

doc/source/text.rst

+2
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ Method Summary
266266
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
267267
:meth:`~Series.str.find`,Equivalent to ``str.find``
268268
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
269+
:meth:`~Series.str.index`,Equivalent to ``str.index``
270+
:meth:`~Series.str.rindex`,Equivalent to ``str.rindex``
269271
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
270272
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
271273
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``

pandas/core/strings.py

+52-4
Original file line numberDiff line numberDiff line change
@@ -576,11 +576,23 @@ def str_find(arr, sub, start=0, end=None, side='left'):
576576
else: # pragma: no cover
577577
raise ValueError('Invalid side')
578578

579-
if end is None:
580-
f = lambda x: getattr(x, method)(sub, start)
581-
else:
582-
f = lambda x: getattr(x, method)(sub, start, end)
579+
f = lambda x: getattr(x, method)(sub, start, end)
580+
return _na_map(f, arr, dtype=int)
581+
582+
583+
def str_index(arr, sub, start=0, end=None, side='left'):
584+
if not isinstance(sub, compat.string_types):
585+
msg = 'expected a string object, not {0}'
586+
raise TypeError(msg.format(type(sub).__name__))
587+
588+
if side == 'left':
589+
method = 'index'
590+
elif side == 'right':
591+
method = 'rindex'
592+
else: # pragma: no cover
593+
raise ValueError('Invalid side')
583594

595+
f = lambda x: getattr(x, method)(sub, start, end)
584596
return _na_map(f, arr, dtype=int)
585597

586598

@@ -1161,6 +1173,42 @@ def rfind(self, sub, start=0, end=None):
11611173
result = str_find(self.series, sub, start=start, end=end, side='right')
11621174
return self._wrap_result(result)
11631175

1176+
_shared_docs['index'] = ("""
1177+
Return %(side)s indexes in each strings where the substring is
1178+
fully contained between [start:end]. This is the same as ``str.%(similar)s``
1179+
except instead of returning -1, it raises a ValueError when the substring
1180+
is not found. Equivalent to standard ``str.%(method)s``.
1181+
1182+
Parameters
1183+
----------
1184+
sub : str
1185+
Substring being searched
1186+
start : int
1187+
Left edge index
1188+
end : int
1189+
Right edge index
1190+
1191+
Returns
1192+
-------
1193+
found : array
1194+
1195+
See Also
1196+
--------
1197+
%(also)s
1198+
""")
1199+
1200+
@Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
1201+
also='rindex : Return highest indexes in each strings'))
1202+
def index(self, sub, start=0, end=None):
1203+
result = str_index(self.series, sub, start=start, end=end, side='left')
1204+
return self._wrap_result(result)
1205+
1206+
@Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
1207+
also='index : Return lowest indexes in each strings'))
1208+
def rindex(self, sub, start=0, end=None):
1209+
result = str_index(self.series, sub, start=start, end=end, side='right')
1210+
return self._wrap_result(result)
1211+
11641212
_shared_docs['len'] = ("""
11651213
Compute length of each string in array.
11661214

pandas/tests/test_strings.py

+47
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,53 @@ def test_find_nan(self):
881881
result = values.str.rfind('EF', 3, 6)
882882
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
883883

884+
def test_index(self):
885+
for klass in [Series, Index]:
886+
s = klass(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF'])
887+
888+
result = s.str.index('EF')
889+
tm.assert_array_equal(result, klass([4, 3, 1, 0]))
890+
expected = np.array([v.index('EF') for v in s.values])
891+
tm.assert_array_equal(result.values, expected)
892+
893+
result = s.str.rindex('EF')
894+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
895+
expected = np.array([v.rindex('EF') for v in s.values])
896+
tm.assert_array_equal(result.values, expected)
897+
898+
result = s.str.index('EF', 3)
899+
tm.assert_array_equal(result, klass([4, 3, 7, 4]))
900+
expected = np.array([v.index('EF', 3) for v in s.values])
901+
tm.assert_array_equal(result.values, expected)
902+
903+
result = s.str.rindex('EF', 3)
904+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
905+
expected = np.array([v.rindex('EF', 3) for v in s.values])
906+
tm.assert_array_equal(result.values, expected)
907+
908+
result = s.str.index('E', 4, 8)
909+
tm.assert_array_equal(result, klass([4, 5, 7, 4]))
910+
expected = np.array([v.index('E', 4, 8) for v in s.values])
911+
tm.assert_array_equal(result.values, expected)
912+
913+
result = s.str.rindex('E', 0, 5)
914+
tm.assert_array_equal(result, klass([4, 3, 1, 4]))
915+
expected = np.array([v.rindex('E', 0, 5) for v in s.values])
916+
tm.assert_array_equal(result.values, expected)
917+
918+
with tm.assertRaisesRegexp(ValueError, "substring not found"):
919+
result = s.str.index('DE')
920+
921+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
922+
result = s.str.index(0)
923+
924+
# test with nan
925+
s = Series(['abcb', 'ab', 'bcbe', np.nan])
926+
result = s.str.index('b')
927+
tm.assert_array_equal(result, Series([1, 1, 0, np.nan]))
928+
result = s.str.rindex('b')
929+
tm.assert_array_equal(result, Series([3, 1, 2, np.nan]))
930+
884931
def test_pad(self):
885932
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
886933

0 commit comments

Comments
 (0)