Skip to content

Commit 15aca78

Browse files
committed
ENH: Add StringMethod.find and rfind
1 parent 0efd4b3 commit 15aca78

File tree

5 files changed

+140
-0
lines changed

5 files changed

+140
-0
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ strings and apply several methods to it. These can be acccessed like
531531
Series.str.encode
532532
Series.str.endswith
533533
Series.str.extract
534+
Series.str.find
534535
Series.str.findall
535536
Series.str.get
536537
Series.str.join
@@ -542,6 +543,7 @@ strings and apply several methods to it. These can be acccessed like
542543
Series.str.pad
543544
Series.str.repeat
544545
Series.str.replace
546+
Series.str.rfind
545547
Series.str.rjust
546548
Series.str.rstrip
547549
Series.str.slice

doc/source/text.rst

+2
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ Method Summary
231231
:meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
232232
:meth:`~Series.str.lower`,Equivalent to ``str.lower``
233233
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
234+
:meth:`~Series.str.find`,Equivalent to ``str.find``
235+
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
234236
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
235237
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
236238
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ Enhancements
156156
- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
157157
``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)
158158

159+
- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`)
159160

160161
- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`)
161162
- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`)

pandas/core/strings.py

+75
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,46 @@ def str_findall(arr, pat, flags=0):
544544
return _na_map(regex.findall, arr)
545545

546546

547+
def str_find(arr, sub, start=0, end=None, side='left'):
548+
"""
549+
Return indexes in each strings where the substring is
550+
fully contained between [start:end]. Return -1 on failure.
551+
552+
Parameters
553+
----------
554+
sub : str
555+
Substring being searched
556+
start : int
557+
Left edge index
558+
end : int
559+
Right edge index
560+
side : {'left', 'right'}, default 'left'
561+
Specifies a starting side, equivalent to ``find`` or ``rfind``
562+
563+
Returns
564+
-------
565+
found : array
566+
"""
567+
568+
if not isinstance(sub, compat.string_types):
569+
msg = 'expected a string object, not {0}'
570+
raise TypeError(msg.format(type(sub).__name__))
571+
572+
if side == 'left':
573+
method = 'find'
574+
elif side == 'right':
575+
method = 'rfind'
576+
else: # pragma: no cover
577+
raise ValueError('Invalid side')
578+
579+
if end is None:
580+
f = lambda x: getattr(x, method)(sub, start)
581+
else:
582+
f = lambda x: getattr(x, method)(sub, start, end)
583+
584+
return _na_map(f, arr, dtype=int)
585+
586+
547587
def str_pad(arr, width, side='left', fillchar=' '):
548588
"""
549589
Pad strings with an additional character
@@ -1072,6 +1112,41 @@ def get_dummies(self, sep='|'):
10721112
findall = _pat_wrapper(str_findall, flags=True)
10731113
extract = _pat_wrapper(str_extract, flags=True)
10741114

1115+
_shared_docs['find'] = ("""
1116+
Return %(side)s indexes in each strings where the substring is
1117+
fully contained between [start:end]. Return -1 on failure.
1118+
Equivalent to standard ``str.%(method)s``.
1119+
1120+
Parameters
1121+
----------
1122+
sub : str
1123+
Substring being searched
1124+
start : int
1125+
Left edge index
1126+
end : int
1127+
Right edge index
1128+
1129+
Returns
1130+
-------
1131+
found : array
1132+
1133+
See Also
1134+
--------
1135+
%(also)s
1136+
""")
1137+
1138+
@Appender(_shared_docs['find'] % dict(side='lowest', method='find',
1139+
also='rfind : Return highest indexes in each strings'))
1140+
def find(self, sub, start=0, end=None):
1141+
result = str_find(self.series, sub, start=start, end=end, side='left')
1142+
return self._wrap_result(result)
1143+
1144+
@Appender(_shared_docs['find'] % dict(side='highest', method='rfind',
1145+
also='find : Return lowest indexes in each strings'))
1146+
def rfind(self, sub, start=0, end=None):
1147+
result = str_find(self.series, sub, start=start, end=end, side='right')
1148+
return self._wrap_result(result)
1149+
10751150
_shared_docs['len'] = ("""
10761151
Compute length of each string in array.
10771152

pandas/tests/test_strings.py

+60
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,8 @@ def test_empty_str_methods(self):
610610
tm.assert_series_equal(empty_str, empty_list.str.join(''))
611611
tm.assert_series_equal(empty_int, empty.str.len())
612612
tm.assert_series_equal(empty_list, empty_list.str.findall('a'))
613+
tm.assert_series_equal(empty_int, empty.str.find('a'))
614+
tm.assert_series_equal(empty_int, empty.str.rfind('a'))
613615
tm.assert_series_equal(empty_str, empty.str.pad(42))
614616
tm.assert_series_equal(empty_str, empty.str.center(42))
615617
tm.assert_series_equal(empty_list, empty.str.split('a'))
@@ -770,6 +772,64 @@ def test_findall(self):
770772
exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]])
771773
tm.assert_almost_equal(result, exp)
772774

775+
def test_find(self):
776+
values = Series(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF', 'XXXX'])
777+
result = values.str.find('EF')
778+
tm.assert_series_equal(result, Series([4, 3, 1, 0, -1]))
779+
expected = np.array([v.find('EF') for v in values.values])
780+
tm.assert_numpy_array_equal(result.values, expected)
781+
782+
result = values.str.rfind('EF')
783+
tm.assert_series_equal(result, Series([4, 5, 7, 4, -1]))
784+
expected = np.array([v.rfind('EF') for v in values.values])
785+
tm.assert_numpy_array_equal(result.values, expected)
786+
787+
result = values.str.find('EF', 3)
788+
tm.assert_series_equal(result, Series([4, 3, 7, 4, -1]))
789+
expected = np.array([v.find('EF', 3) for v in values.values])
790+
tm.assert_numpy_array_equal(result.values, expected)
791+
792+
result = values.str.rfind('EF', 3)
793+
tm.assert_series_equal(result, Series([4, 5, 7, 4, -1]))
794+
expected = np.array([v.rfind('EF', 3) for v in values.values])
795+
tm.assert_numpy_array_equal(result.values, expected)
796+
797+
result = values.str.find('EF', 3, 6)
798+
tm.assert_series_equal(result, Series([4, 3, -1, 4, -1]))
799+
expected = np.array([v.find('EF', 3, 6) for v in values.values])
800+
tm.assert_numpy_array_equal(result.values, expected)
801+
802+
result = values.str.rfind('EF', 3, 6)
803+
tm.assert_series_equal(result, Series([4, 3, -1, 4, -1]))
804+
expected = np.array([v.rfind('EF', 3, 6) for v in values.values])
805+
tm.assert_numpy_array_equal(result.values, expected)
806+
807+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
808+
result = values.str.find(0)
809+
810+
with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
811+
result = values.str.rfind(0)
812+
813+
def test_find_nan(self):
814+
values = Series(['ABCDEFG', np.nan, 'DEFGHIJEF', np.nan, 'XXXX'])
815+
result = values.str.find('EF')
816+
tm.assert_series_equal(result, Series([4, np.nan, 1, np.nan, -1]))
817+
818+
result = values.str.rfind('EF')
819+
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))
820+
821+
result = values.str.find('EF', 3)
822+
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))
823+
824+
result = values.str.rfind('EF', 3)
825+
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))
826+
827+
result = values.str.find('EF', 3, 6)
828+
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
829+
830+
result = values.str.rfind('EF', 3, 6)
831+
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))
832+
773833
def test_pad(self):
774834
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
775835

0 commit comments

Comments
 (0)