ENH: StringMethods supports is_xxx methods

sinhrks · sinhrks · commit bc63677ced9f · 2015-01-26T22:56:12.000+09:00
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -549,6 +549,13 @@ strings and apply several methods to it. These can be acccessed like
    Series.str.strip
    Series.str.title
    Series.str.upper
+   Series.str.isalnum
+   Series.str.isalpha
+   Series.str.isdigit
+   Series.str.isspace
+   Series.str.islower
+   Series.str.isupper
+   Series.str.istitle
    Series.str.get_dummies
 
 .. _api.categorical:
diff --git a/doc/source/text.rst b/doc/source/text.rst
@@ -228,3 +228,10 @@ Method Summary
     :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip``
     :meth:`~Series.str.lower`,Equivalent to ``str.lower``
     :meth:`~Series.str.upper`,Equivalent to ``str.upper``
+    :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
+    :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
+    :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``
+    :meth:`~Series.str.isspace`,Equivalent to ``str.isspace``
+    :meth:`~Series.str.islower`,Equivalent to ``str.islower``
+    :meth:`~Series.str.isupper`,Equivalent to ``str.isupper``
+    :meth:`~Series.str.istitle`,Equivalent to ``str.istitle``
diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
@@ -104,11 +104,12 @@ Enhancements
 - Added ``Series.str.slice_replace()``, which previously raised NotImplementedError (:issue:`8888`)
 - Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`)
 - ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`)
-
 - ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`)
 - SQL code now safely escapes table and column names (:issue:`8986`)
 
 - Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
+- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``,
+``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`)
 
 Performance
 ~~~~~~~~~~~
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -9,6 +9,9 @@
 import textwrap
 
 
+_shared_docs = dict()
+
+
 def _get_array_list(arr, others):
     from pandas.core.series import Series
 
@@ -124,17 +127,6 @@ def g(x):
         return lib.map_infer(arr, f)
 
 
-def str_title(arr):
-    """
-    Convert strings to titlecased version
-
-    Returns
-    -------
-    titled : array
-    """
-    return _na_map(lambda x: x.title(), arr)
-
-
 def str_count(arr, pat, flags=0):
     """
     Count occurrences of pattern in each string
@@ -197,7 +189,8 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
         else:
             upper_pat = pat.upper()
             f = lambda x: upper_pat in x
-            return _na_map(f, str_upper(arr), na, dtype=bool)
+            uppered = _na_map(lambda x: x.upper(), arr)
+            return _na_map(f, uppered, na, dtype=bool)
     return _na_map(f, arr, na, dtype=bool)
 
 
@@ -239,28 +232,6 @@ def str_endswith(arr, pat, na=np.nan):
     return _na_map(f, arr, na, dtype=bool)
 
 
-def str_lower(arr):
-    """
-    Convert strings in array to lowercase
-
-    Returns
-    -------
-    lowercase : array
-    """
-    return _na_map(lambda x: x.lower(), arr)
-
-
-def str_upper(arr):
-    """
-    Convert strings in array to uppercase
-
-    Returns
-    -------
-    uppercase : array
-    """
-    return _na_map(lambda x: x.upper(), arr)
-
-
 def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
     """
     Replace
@@ -553,17 +524,6 @@ def str_join(arr, sep):
     return _na_map(sep.join, arr)
 
 
-def str_len(arr):
-    """
-    Compute length of each string in array.
-
-    Returns
-    -------
-    lengths : array
-    """
-    return _na_map(len, arr, dtype=int)
-
-
 def str_findall(arr, pat, flags=0):
     """
     Find all occurrences of pattern or regular expression
@@ -884,14 +844,16 @@ def str_encode(arr, encoding, errors="strict"):
     return _na_map(f, arr)
 
 
-def _noarg_wrapper(f):
+def _noarg_wrapper(f, docstring=None, **kargs):
     def wrapper(self):
-        result = f(self.series)
+        result = _na_map(f, self.series, **kargs)
         return self._wrap_result(result)
 
     wrapper.__name__ = f.__name__
-    if f.__doc__:
-        wrapper.__doc__ = f.__doc__
+    if docstring is not None:
+        wrapper.__doc__ = docstring
+    else:
+        raise ValueError('Provide docstring')
 
     return wrapper
 
@@ -1076,7 +1038,47 @@ def get_dummies(self, sep='|'):
     findall = _pat_wrapper(str_findall, flags=True)
     extract = _pat_wrapper(str_extract, flags=True)
 
-    len = _noarg_wrapper(str_len)
-    lower = _noarg_wrapper(str_lower)
-    upper = _noarg_wrapper(str_upper)
-    title = _noarg_wrapper(str_title)
+    _shared_docs['len'] = ("""
+    Compute length of each string in array.
+
+    Returns
+    -------
+    lengths : array
+    """)
+    len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
+
+    _shared_docs['casemethods'] = ("""
+    Convert strings in array to %s
+
+    Returns
+    -------
+    uppercase : array
+    """)
+    lower = _noarg_wrapper(lambda x: x.lower(),
+                           docstring=_shared_docs['casemethods'] % 'lowercase')
+    upper = _noarg_wrapper(lambda x: x.upper(),
+                           docstring=_shared_docs['casemethods'] % 'uppercase')
+    title = _noarg_wrapper(lambda x: x.title(),
+                           docstring=_shared_docs['casemethods'] % 'titlecase')
+
+    _shared_docs['ismethods'] = ("""
+    Check whether all characters in each string in the array are %s
+
+    Returns
+    -------
+    Series of boolean values
+    """)
+    isalnum = _noarg_wrapper(lambda x: x.isalnum(),
+                             docstring=_shared_docs['ismethods'] % 'alphanumeric')
+    isalpha = _noarg_wrapper(lambda x: x.isalpha(),
+                             docstring=_shared_docs['ismethods'] % 'alphabetic')
+    isdigit = _noarg_wrapper(lambda x: x.isdigit(),
+                             docstring=_shared_docs['ismethods'] % 'digits')
+    isspace = _noarg_wrapper(lambda x: x.isspace(),
+                             docstring=_shared_docs['ismethods'] % 'whitespace')
+    islower = _noarg_wrapper(lambda x: x.islower(),
+                             docstring=_shared_docs['ismethods'] % 'lowercase')
+    isupper = _noarg_wrapper(lambda x: x.isupper(),
+                             docstring=_shared_docs['ismethods'] % 'uppercase')
+    istitle = _noarg_wrapper(lambda x: x.istitle(),
+                             docstring=_shared_docs['ismethods'] % 'titlecase')
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -623,6 +623,41 @@ def test_empty_str_methods(self):
         tm.assert_series_equal(empty_str, empty.str.get(0))
         tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))
         tm.assert_series_equal(empty_bytes, empty.str.encode('ascii'))
+        tm.assert_series_equal(empty_str, empty.str.isalnum())
+        tm.assert_series_equal(empty_str, empty.str.isalpha())
+        tm.assert_series_equal(empty_str, empty.str.isdigit())
+        tm.assert_series_equal(empty_str, empty.str.isspace())
+        tm.assert_series_equal(empty_str, empty.str.islower())
+        tm.assert_series_equal(empty_str, empty.str.isupper())
+        tm.assert_series_equal(empty_str, empty.str.istitle())
+
+    def test_ismethods(self):
+        values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', '  ']
+        str_s = Series(values)
+        alnum_e = [True, True, True, True, True, False, True, True, False, False]
+        alpha_e = [True, True, True, False, False, False, True, False, False, False]
+        digit_e = [False, False, False, True, False, False, False, True, False, False]
+        num_e = [False, False, False, True, False, False, False, True, False, False]
+        space_e = [False, False, False, False, False, False, False, False, False, True]
+        lower_e = [False, True, False, False, False, False, False, False, False, False]
+        upper_e = [True, False, False, False, True, False, True, False, False, False]
+        title_e = [True, False, True, False, True, False, False, False, False, False]
+
+        tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
+        tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
+        tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
+        tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
+        tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
+        tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
+        tm.assert_series_equal(str_s.str.istitle(), Series(title_e))
+
+        self.assertEquals(str_s.str.isalnum().tolist(), [v.isalnum() for v in values])
+        self.assertEquals(str_s.str.isalpha().tolist(), [v.isalpha() for v in values])
+        self.assertEquals(str_s.str.isdigit().tolist(), [v.isdigit() for v in values])
+        self.assertEquals(str_s.str.isspace().tolist(), [v.isspace() for v in values])
+        self.assertEquals(str_s.str.islower().tolist(), [v.islower() for v in values])
+        self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values])
+        self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values])
 
     def test_get_dummies(self):
         s = Series(['a|b', 'a|c', np.nan])