Skip to content

Commit bc63677

Browse files
committed
ENH: StringMethods supports is_xxx methods
1 parent 224a66d commit bc63677

File tree

5 files changed

+106
-54
lines changed

5 files changed

+106
-54
lines changed

doc/source/api.rst

+7
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,13 @@ strings and apply several methods to it. These can be acccessed like
549549
Series.str.strip
550550
Series.str.title
551551
Series.str.upper
552+
Series.str.isalnum
553+
Series.str.isalpha
554+
Series.str.isdigit
555+
Series.str.isspace
556+
Series.str.islower
557+
Series.str.isupper
558+
Series.str.istitle
552559
Series.str.get_dummies
553560

554561
.. _api.categorical:

doc/source/text.rst

+7
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,10 @@ Method Summary
228228
:meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
229229
:meth:`~Series.str.lower`,Equivalent to ``str.lower``
230230
:meth:`~Series.str.upper`,Equivalent to ``str.upper``
231+
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
232+
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
233+
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
234+
:meth:`~Series.str.isspace`,Equivalent to ``str.isspace``
235+
:meth:`~Series.str.islower`,Equivalent to ``str.islower``
236+
:meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
237+
:meth:`~Series.str.istitle`,Equivalent to ``str.istitle``

doc/source/whatsnew/v0.16.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,12 @@ Enhancements
104104
- Added ``Series.str.slice_replace()``, which previously raised NotImplementedError (:issue:`8888`)
105105
- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`)
106106
- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`)
107-
108107
- ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`)
109108
- SQL code now safely escapes table and column names (:issue:`8986`)
110109

111110
- Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
111+
- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
112+
``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)
112113

113114
Performance
114115
~~~~~~~~~~~

pandas/core/strings.py

+55-53
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import textwrap
1010

1111

12+
_shared_docs = dict()
13+
14+
1215
def _get_array_list(arr, others):
1316
from pandas.core.series import Series
1417

@@ -124,17 +127,6 @@ def g(x):
124127
return lib.map_infer(arr, f)
125128

126129

127-
def str_title(arr):
128-
"""
129-
Convert strings to titlecased version
130-
131-
Returns
132-
-------
133-
titled : array
134-
"""
135-
return _na_map(lambda x: x.title(), arr)
136-
137-
138130
def str_count(arr, pat, flags=0):
139131
"""
140132
Count occurrences of pattern in each string
@@ -197,7 +189,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
197189
else:
198190
upper_pat = pat.upper()
199191
f = lambda x: upper_pat in x
200-
return _na_map(f, str_upper(arr), na, dtype=bool)
192+
uppered = _na_map(lambda x: x.upper(), arr)
193+
return _na_map(f, uppered, na, dtype=bool)
201194
return _na_map(f, arr, na, dtype=bool)
202195

203196

@@ -239,28 +232,6 @@ def str_endswith(arr, pat, na=np.nan):
239232
return _na_map(f, arr, na, dtype=bool)
240233

241234

242-
def str_lower(arr):
243-
"""
244-
Convert strings in array to lowercase
245-
246-
Returns
247-
-------
248-
lowercase : array
249-
"""
250-
return _na_map(lambda x: x.lower(), arr)
251-
252-
253-
def str_upper(arr):
254-
"""
255-
Convert strings in array to uppercase
256-
257-
Returns
258-
-------
259-
uppercase : array
260-
"""
261-
return _na_map(lambda x: x.upper(), arr)
262-
263-
264235
def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
265236
"""
266237
Replace
@@ -553,17 +524,6 @@ def str_join(arr, sep):
553524
return _na_map(sep.join, arr)
554525

555526

556-
def str_len(arr):
557-
"""
558-
Compute length of each string in array.
559-
560-
Returns
561-
-------
562-
lengths : array
563-
"""
564-
return _na_map(len, arr, dtype=int)
565-
566-
567527
def str_findall(arr, pat, flags=0):
568528
"""
569529
Find all occurrences of pattern or regular expression
@@ -884,14 +844,16 @@ def str_encode(arr, encoding, errors="strict"):
884844
return _na_map(f, arr)
885845

886846

887-
def _noarg_wrapper(f):
847+
def _noarg_wrapper(f, docstring=None, **kargs):
888848
def wrapper(self):
889-
result = f(self.series)
849+
result = _na_map(f, self.series, **kargs)
890850
return self._wrap_result(result)
891851

892852
wrapper.__name__ = f.__name__
893-
if f.__doc__:
894-
wrapper.__doc__ = f.__doc__
853+
if docstring is not None:
854+
wrapper.__doc__ = docstring
855+
else:
856+
raise ValueError('Provide docstring')
895857

896858
return wrapper
897859

@@ -1076,7 +1038,47 @@ def get_dummies(self, sep='|'):
10761038
findall = _pat_wrapper(str_findall, flags=True)
10771039
extract = _pat_wrapper(str_extract, flags=True)
10781040

1079-
len = _noarg_wrapper(str_len)
1080-
lower = _noarg_wrapper(str_lower)
1081-
upper = _noarg_wrapper(str_upper)
1082-
title = _noarg_wrapper(str_title)
1041+
_shared_docs['len'] = ("""
1042+
Compute length of each string in array.
1043+
1044+
Returns
1045+
-------
1046+
lengths : array
1047+
""")
1048+
len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
1049+
1050+
_shared_docs['casemethods'] = ("""
1051+
Convert strings in array to %s
1052+
1053+
Returns
1054+
-------
1055+
uppercase : array
1056+
""")
1057+
lower = _noarg_wrapper(lambda x: x.lower(),
1058+
docstring=_shared_docs['casemethods'] % 'lowercase')
1059+
upper = _noarg_wrapper(lambda x: x.upper(),
1060+
docstring=_shared_docs['casemethods'] % 'uppercase')
1061+
title = _noarg_wrapper(lambda x: x.title(),
1062+
docstring=_shared_docs['casemethods'] % 'titlecase')
1063+
1064+
_shared_docs['ismethods'] = ("""
1065+
Check whether all characters in each string in the array are %s
1066+
1067+
Returns
1068+
-------
1069+
Series of boolean values
1070+
""")
1071+
isalnum = _noarg_wrapper(lambda x: x.isalnum(),
1072+
docstring=_shared_docs['ismethods'] % 'alphanumeric')
1073+
isalpha = _noarg_wrapper(lambda x: x.isalpha(),
1074+
docstring=_shared_docs['ismethods'] % 'alphabetic')
1075+
isdigit = _noarg_wrapper(lambda x: x.isdigit(),
1076+
docstring=_shared_docs['ismethods'] % 'digits')
1077+
isspace = _noarg_wrapper(lambda x: x.isspace(),
1078+
docstring=_shared_docs['ismethods'] % 'whitespace')
1079+
islower = _noarg_wrapper(lambda x: x.islower(),
1080+
docstring=_shared_docs['ismethods'] % 'lowercase')
1081+
isupper = _noarg_wrapper(lambda x: x.isupper(),
1082+
docstring=_shared_docs['ismethods'] % 'uppercase')
1083+
istitle = _noarg_wrapper(lambda x: x.istitle(),
1084+
docstring=_shared_docs['ismethods'] % 'titlecase')

pandas/tests/test_strings.py

+35
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,41 @@ def test_empty_str_methods(self):
623623
tm.assert_series_equal(empty_str, empty.str.get(0))
624624
tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))
625625
tm.assert_series_equal(empty_bytes, empty.str.encode('ascii'))
626+
tm.assert_series_equal(empty_str, empty.str.isalnum())
627+
tm.assert_series_equal(empty_str, empty.str.isalpha())
628+
tm.assert_series_equal(empty_str, empty.str.isdigit())
629+
tm.assert_series_equal(empty_str, empty.str.isspace())
630+
tm.assert_series_equal(empty_str, empty.str.islower())
631+
tm.assert_series_equal(empty_str, empty.str.isupper())
632+
tm.assert_series_equal(empty_str, empty.str.istitle())
633+
634+
def test_ismethods(self):
635+
values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' ']
636+
str_s = Series(values)
637+
alnum_e = [True, True, True, True, True, False, True, True, False, False]
638+
alpha_e = [True, True, True, False, False, False, True, False, False, False]
639+
digit_e = [False, False, False, True, False, False, False, True, False, False]
640+
num_e = [False, False, False, True, False, False, False, True, False, False]
641+
space_e = [False, False, False, False, False, False, False, False, False, True]
642+
lower_e = [False, True, False, False, False, False, False, False, False, False]
643+
upper_e = [True, False, False, False, True, False, True, False, False, False]
644+
title_e = [True, False, True, False, True, False, False, False, False, False]
645+
646+
tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
647+
tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
648+
tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
649+
tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
650+
tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
651+
tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
652+
tm.assert_series_equal(str_s.str.istitle(), Series(title_e))
653+
654+
self.assertEquals(str_s.str.isalnum().tolist(), [v.isalnum() for v in values])
655+
self.assertEquals(str_s.str.isalpha().tolist(), [v.isalpha() for v in values])
656+
self.assertEquals(str_s.str.isdigit().tolist(), [v.isdigit() for v in values])
657+
self.assertEquals(str_s.str.isspace().tolist(), [v.isspace() for v in values])
658+
self.assertEquals(str_s.str.islower().tolist(), [v.islower() for v in values])
659+
self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values])
660+
self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values])
626661

627662
def test_get_dummies(self):
628663
s = Series(['a|b', 'a|c', np.nan])

0 commit comments

Comments
 (0)