Skip to content

ENH: StringMethods supports is_xxx methods #9282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,13 @@ strings and apply several methods to it. These can be acccessed like
Series.str.strip
Series.str.title
Series.str.upper
Series.str.isalnum
Series.str.isalpha
Series.str.isdigit
Series.str.isspace
Series.str.islower
Series.str.isupper
Series.str.istitle
Series.str.get_dummies

.. _api.categorical:
Expand Down
7 changes: 7 additions & 0 deletions doc/source/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,10 @@ Method Summary
:meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
:meth:`~Series.str.lower`,Equivalent to ``str.lower``
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
:meth:`~Series.str.isspace`,Equivalent to ``str.isspace``
:meth:`~Series.str.islower`,Equivalent to ``str.islower``
:meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
:meth:`~Series.str.istitle`,Equivalent to ``str.istitle``
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,12 @@ Enhancements
- Added ``Series.str.slice_replace()``, which previously raised NotImplementedError (:issue:`8888`)
- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`)
- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`)

- ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`)
- SQL code now safely escapes table and column names (:issue:`8986`)

- Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)

Performance
~~~~~~~~~~~
Expand Down
108 changes: 55 additions & 53 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import textwrap


_shared_docs = dict()


def _get_array_list(arr, others):
from pandas.core.series import Series

Expand Down Expand Up @@ -124,17 +127,6 @@ def g(x):
return lib.map_infer(arr, f)


def str_title(arr):
"""
Convert strings to titlecased version

Returns
-------
titled : array
"""
return _na_map(lambda x: x.title(), arr)


def str_count(arr, pat, flags=0):
"""
Count occurrences of pattern in each string
Expand Down Expand Up @@ -197,7 +189,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
else:
upper_pat = pat.upper()
f = lambda x: upper_pat in x
return _na_map(f, str_upper(arr), na, dtype=bool)
uppered = _na_map(lambda x: x.upper(), arr)
return _na_map(f, uppered, na, dtype=bool)
return _na_map(f, arr, na, dtype=bool)


Expand Down Expand Up @@ -239,28 +232,6 @@ def str_endswith(arr, pat, na=np.nan):
return _na_map(f, arr, na, dtype=bool)


def str_lower(arr):
"""
Convert strings in array to lowercase

Returns
-------
lowercase : array
"""
return _na_map(lambda x: x.lower(), arr)


def str_upper(arr):
"""
Convert strings in array to uppercase

Returns
-------
uppercase : array
"""
return _na_map(lambda x: x.upper(), arr)


def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
"""
Replace
Expand Down Expand Up @@ -553,17 +524,6 @@ def str_join(arr, sep):
return _na_map(sep.join, arr)


def str_len(arr):
"""
Compute length of each string in array.

Returns
-------
lengths : array
"""
return _na_map(len, arr, dtype=int)


def str_findall(arr, pat, flags=0):
"""
Find all occurrences of pattern or regular expression
Expand Down Expand Up @@ -884,14 +844,16 @@ def str_encode(arr, encoding, errors="strict"):
return _na_map(f, arr)


def _noarg_wrapper(f):
def _noarg_wrapper(f, docstring=None, **kargs):
def wrapper(self):
result = f(self.series)
result = _na_map(f, self.series, **kargs)
return self._wrap_result(result)

wrapper.__name__ = f.__name__
if f.__doc__:
wrapper.__doc__ = f.__doc__
if docstring is not None:
wrapper.__doc__ = docstring
else:
raise ValueError('Provide docstring')

return wrapper

Expand Down Expand Up @@ -1076,7 +1038,47 @@ def get_dummies(self, sep='|'):
findall = _pat_wrapper(str_findall, flags=True)
extract = _pat_wrapper(str_extract, flags=True)

len = _noarg_wrapper(str_len)
lower = _noarg_wrapper(str_lower)
upper = _noarg_wrapper(str_upper)
title = _noarg_wrapper(str_title)
_shared_docs['len'] = ("""
Compute length of each string in array.

Returns
-------
lengths : array
""")
len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)

_shared_docs['casemethods'] = ("""
Convert strings in array to %s

Returns
-------
uppercase : array
""")
lower = _noarg_wrapper(lambda x: x.lower(),
docstring=_shared_docs['casemethods'] % 'lowercase')
upper = _noarg_wrapper(lambda x: x.upper(),
docstring=_shared_docs['casemethods'] % 'uppercase')
title = _noarg_wrapper(lambda x: x.title(),
docstring=_shared_docs['casemethods'] % 'titlecase')

_shared_docs['ismethods'] = ("""
Check whether all characters in each string in the array are %s

Returns
-------
Series of boolean values
""")
isalnum = _noarg_wrapper(lambda x: x.isalnum(),
docstring=_shared_docs['ismethods'] % 'alphanumeric')
isalpha = _noarg_wrapper(lambda x: x.isalpha(),
docstring=_shared_docs['ismethods'] % 'alphabetic')
isdigit = _noarg_wrapper(lambda x: x.isdigit(),
docstring=_shared_docs['ismethods'] % 'digits')
isspace = _noarg_wrapper(lambda x: x.isspace(),
docstring=_shared_docs['ismethods'] % 'whitespace')
islower = _noarg_wrapper(lambda x: x.islower(),
docstring=_shared_docs['ismethods'] % 'lowercase')
isupper = _noarg_wrapper(lambda x: x.isupper(),
docstring=_shared_docs['ismethods'] % 'uppercase')
istitle = _noarg_wrapper(lambda x: x.istitle(),
docstring=_shared_docs['ismethods'] % 'titlecase')
35 changes: 35 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,41 @@ def test_empty_str_methods(self):
tm.assert_series_equal(empty_str, empty.str.get(0))
tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))
tm.assert_series_equal(empty_bytes, empty.str.encode('ascii'))
tm.assert_series_equal(empty_str, empty.str.isalnum())
tm.assert_series_equal(empty_str, empty.str.isalpha())
tm.assert_series_equal(empty_str, empty.str.isdigit())
tm.assert_series_equal(empty_str, empty.str.isspace())
tm.assert_series_equal(empty_str, empty.str.islower())
tm.assert_series_equal(empty_str, empty.str.isupper())
tm.assert_series_equal(empty_str, empty.str.istitle())

def test_ismethods(self):
values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' ']
str_s = Series(values)
alnum_e = [True, True, True, True, True, False, True, True, False, False]
alpha_e = [True, True, True, False, False, False, True, False, False, False]
digit_e = [False, False, False, True, False, False, False, True, False, False]
num_e = [False, False, False, True, False, False, False, True, False, False]
space_e = [False, False, False, False, False, False, False, False, False, True]
lower_e = [False, True, False, False, False, False, False, False, False, False]
upper_e = [True, False, False, False, True, False, True, False, False, False]
title_e = [True, False, True, False, True, False, False, False, False, False]

tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
tm.assert_series_equal(str_s.str.istitle(), Series(title_e))

self.assertEquals(str_s.str.isalnum().tolist(), [v.isalnum() for v in values])
self.assertEquals(str_s.str.isalpha().tolist(), [v.isalpha() for v in values])
self.assertEquals(str_s.str.isdigit().tolist(), [v.isdigit() for v in values])
self.assertEquals(str_s.str.isspace().tolist(), [v.isspace() for v in values])
self.assertEquals(str_s.str.islower().tolist(), [v.islower() for v in values])
self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values])
self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values])

def test_get_dummies(self):
s = Series(['a|b', 'a|c', np.nan])
Expand Down