Skip to content

ENH: Add StringMethod.find and rfind #9386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ strings and apply several methods to it. These can be acccessed like
Series.str.encode
Series.str.endswith
Series.str.extract
Series.str.find
Series.str.findall
Series.str.get
Series.str.join
Expand All @@ -542,6 +543,7 @@ strings and apply several methods to it. These can be acccessed like
Series.str.pad
Series.str.repeat
Series.str.replace
Series.str.rfind
Series.str.rjust
Series.str.rstrip
Series.str.slice
Expand Down
2 changes: 2 additions & 0 deletions doc/source/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ Method Summary
:meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
:meth:`~Series.str.lower`,Equivalent to ``str.lower``
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
:meth:`~Series.str.find`,Equivalent to ``str.find``
:meth:`~Series.str.rfind`,Equivalent to ``str.rfind``
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ Enhancements
- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)

- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`)

- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`)
- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`)
Expand Down
75 changes: 75 additions & 0 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,46 @@ def str_findall(arr, pat, flags=0):
return _na_map(regex.findall, arr)


def str_find(arr, sub, start=0, end=None, side='left'):
"""
Return indexes in each strings where the substring is
fully contained between [start:end]. Return -1 on failure.
Parameters
----------
sub : str
Substring being searched
start : int
Left edge index
end : int
Right edge index
side : {'left', 'right'}, default 'left'
Specifies a starting side, equivalent to ``find`` or ``rfind``
Returns
-------
found : array
"""

if not isinstance(sub, compat.string_types):
msg = 'expected a string object, not {0}'
raise TypeError(msg.format(type(sub).__name__))

if side == 'left':
method = 'find'
elif side == 'right':
method = 'rfind'
else: # pragma: no cover
raise ValueError('Invalid side')

if end is None:
f = lambda x: getattr(x, method)(sub, start)
else:
f = lambda x: getattr(x, method)(sub, start, end)

return _na_map(f, arr, dtype=int)


def str_pad(arr, width, side='left', fillchar=' '):
"""
Pad strings with an additional character
Expand Down Expand Up @@ -1072,6 +1112,41 @@ def get_dummies(self, sep='|'):
findall = _pat_wrapper(str_findall, flags=True)
extract = _pat_wrapper(str_extract, flags=True)

_shared_docs['find'] = ("""
Return %(side)s indexes in each strings where the substring is
fully contained between [start:end]. Return -1 on failure.
Equivalent to standard ``str.%(method)s``.
Parameters
----------
sub : str
Substring being searched
start : int
Left edge index
end : int
Right edge index
Returns
-------
found : array
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a "See also" here pointing to the other function (find <-> rfind) saying shortly what the difference is?

See Also
--------
%(also)s
""")

@Appender(_shared_docs['find'] % dict(side='lowest', method='find',
also='rfind : Return highest indexes in each strings'))
def find(self, sub, start=0, end=None):
result = str_find(self.series, sub, start=start, end=end, side='left')
return self._wrap_result(result)

@Appender(_shared_docs['find'] % dict(side='highest', method='rfind',
also='find : Return lowest indexes in each strings'))
def rfind(self, sub, start=0, end=None):
result = str_find(self.series, sub, start=start, end=end, side='right')
return self._wrap_result(result)

_shared_docs['len'] = ("""
Compute length of each string in array.
Expand Down
60 changes: 60 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,8 @@ def test_empty_str_methods(self):
tm.assert_series_equal(empty_str, empty_list.str.join(''))
tm.assert_series_equal(empty_int, empty.str.len())
tm.assert_series_equal(empty_list, empty_list.str.findall('a'))
tm.assert_series_equal(empty_int, empty.str.find('a'))
tm.assert_series_equal(empty_int, empty.str.rfind('a'))
tm.assert_series_equal(empty_str, empty.str.pad(42))
tm.assert_series_equal(empty_str, empty.str.center(42))
tm.assert_series_equal(empty_list, empty.str.split('a'))
Expand Down Expand Up @@ -770,6 +772,64 @@ def test_findall(self):
exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]])
tm.assert_almost_equal(result, exp)

def test_find(self):
values = Series(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF', 'XXXX'])
result = values.str.find('EF')
tm.assert_series_equal(result, Series([4, 3, 1, 0, -1]))
expected = np.array([v.find('EF') for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.rfind('EF')
tm.assert_series_equal(result, Series([4, 5, 7, 4, -1]))
expected = np.array([v.rfind('EF') for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.find('EF', 3)
tm.assert_series_equal(result, Series([4, 3, 7, 4, -1]))
expected = np.array([v.find('EF', 3) for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.rfind('EF', 3)
tm.assert_series_equal(result, Series([4, 5, 7, 4, -1]))
expected = np.array([v.rfind('EF', 3) for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.find('EF', 3, 6)
tm.assert_series_equal(result, Series([4, 3, -1, 4, -1]))
expected = np.array([v.find('EF', 3, 6) for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.rfind('EF', 3, 6)
tm.assert_series_equal(result, Series([4, 3, -1, 4, -1]))
expected = np.array([v.rfind('EF', 3, 6) for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
result = values.str.find(0)

with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"):
result = values.str.rfind(0)

def test_find_nan(self):
values = Series(['ABCDEFG', np.nan, 'DEFGHIJEF', np.nan, 'XXXX'])
result = values.str.find('EF')
tm.assert_series_equal(result, Series([4, np.nan, 1, np.nan, -1]))

result = values.str.rfind('EF')
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

result = values.str.find('EF', 3)
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

result = values.str.rfind('EF', 3)
tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

result = values.str.find('EF', 3, 6)
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))

result = values.str.rfind('EF', 3, 6)
tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))

def test_pad(self):
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])

Expand Down