From 916d1831cd8be916dfaf55f89b413f8f842341c2 Mon Sep 17 00:00:00 2001 From: cmusselle Date: Tue, 1 May 2018 23:16:44 +0100 Subject: [PATCH 1/3] Enhance docstrings for Series.str.ismethods --- pandas/core/strings.py | 130 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c6d45ce5413ac..3625942cc6d73 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2401,11 +2401,137 @@ def rindex(self, sub, start=0, end=None): _shared_docs['ismethods'] = (""" Check whether all characters in each string in the Series/Index - are %(type)s. Equivalent to :meth:`str.%(method)s`. + are %(type)s. + + This is equivalent to running the Python string method + :meth:`str.%(method)s` for each element of the Series/Index. If a string + has zero characters, `False` is returned for that check. Returns ------- - is : Series/array of boolean values + Series + Series of boolean values with the same length as the original + Series/Index. + + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. + + Examples + -------- + **Checks for Alphabetic and Numeric Characters** + + >>> s1 = pd.Series(['AB', 'C12', '42', '']) + + >>> # All are alphabetic characters + >>> s1.str.isalpha() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> # All are numeric characters + >>> s1.str.isnumeric() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> # All are either alphabetic characters or numeric characters + >>> s1.str.isalnum() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + Note that checks against characters mixed with any additional punctuation + or whitespace will evaluate to false for an alphanumeric check. + + >>> s2 = pd.Series(['A B', '1.5', '3,000']) + >>> s2.str.isalnum() + 0 False + 1 False + 2 False + dtype: bool + + **More Detailed Checks for Numeric Characters** + + >>> s3 = pd.Series(['23', '³', '⅕', '']) + + >>> # All are characters used to form numbers in base 10 + >>> s3.str.isdecimal() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> # Same as s.str.isdecimal, but also includes special + >>> # digits, like superscripted/subscripted digits + >>> s3.str.isdigit() + 0 True + 1 True + 2 False + 3 False + dtype: bool + + >>> # Same as s.str.isdigit, but also includes other characters + >>> # that can represent quantities such as unicode fractions + >>> s3.str.isnumeric() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + **Checks for Whitespace** + + >>> # All characters represent whitespace + >>> s4 = pd.Series([' ','\\t\\r\\n ', '']) + >>> s4.str.isspace() + 0 True + 1 True + 2 False + dtype: bool + + **Checks for Character Case** + + >>> s5 = pd.Series(['leopard', 'Golden Eagal', 'SNAKE', '']) + + >>> # All characters are lowercase + >>> s5.str.islower() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> # All characters are uppercase + >>> s5.str.isupper() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> # All words are in title case (first letter of each word capitalized) + >>> s5.str.istitle() + 0 False + 1 True + 2 False + 3 False + dtype: bool """) _shared_docs['isalnum'] = dict(type='alphanumeric', method='isalnum') _shared_docs['isalpha'] = dict(type='alphabetic', method='isalpha') From e03a463989d688f4ce7b0e94c3121a3d4e645395 Mon Sep 17 00:00:00 2001 From: cmusselle Date: Tue, 8 May 2018 20:21:36 +0100 Subject: [PATCH 2/3] Specify file encoding to fix python 2.7 tests/checks --- pandas/core/strings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 3625942cc6d73..d6bb03cf5c535 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import numpy as np from pandas.compat import zip From 7f066c96e210684cc90354d89523ad500bfc7fc2 Mon Sep 17 00:00:00 2001 From: cmusselle Date: Tue, 8 May 2018 20:24:17 +0100 Subject: [PATCH 3/3] Ammend docstring following feedback - Made docstring header fit on one line - Expanded return value dtype - Switched explanations in comments to text - Simplified examples - Fixed typos --- pandas/core/strings.py | 46 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index d6bb03cf5c535..4af5b0ee5dd98 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2401,17 +2401,16 @@ def rindex(self, sub, start=0, end=None): _shared_docs['swapcase']) _shared_docs['ismethods'] = (""" - Check whether all characters in each string in the Series/Index - are %(type)s. + Check whether all characters in each string are %(type)s. This is equivalent to running the Python string method :meth:`str.%(method)s` for each element of the Series/Index. If a string - has zero characters, `False` is returned for that check. + has zero characters, ``False`` is returned for that check. Returns ------- - Series - Series of boolean values with the same length as the original + Series or Index of bool + Series or Index of boolean values with the same length as the original Series/Index. See Also @@ -2430,9 +2429,8 @@ def rindex(self, sub, start=0, end=None): -------- **Checks for Alphabetic and Numeric Characters** - >>> s1 = pd.Series(['AB', 'C12', '42', '']) + >>> s1 = pd.Series(['one', 'one1', '1', '']) - >>> # All are alphabetic characters >>> s1.str.isalpha() 0 True 1 False @@ -2440,7 +2438,6 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # All are numeric characters >>> s1.str.isnumeric() 0 False 1 False @@ -2448,7 +2445,6 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # All are either alphabetic characters or numeric characters >>> s1.str.isalnum() 0 True 1 True @@ -2468,9 +2464,14 @@ def rindex(self, sub, start=0, end=None): **More Detailed Checks for Numeric Characters** + There are several different but overlapping sets of numeric characters that + can be checked for. + >>> s3 = pd.Series(['23', '³', '⅕', '']) - >>> # All are characters used to form numbers in base 10 + The ``s3.str.isdecimal`` method checks for characters used to form numbers + in base 10. + >>> s3.str.isdecimal() 0 True 1 False @@ -2478,8 +2479,10 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # Same as s.str.isdecimal, but also includes special - >>> # digits, like superscripted/subscripted digits + The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also + includes special digits, like superscripted and subscripted digits in + unicode. + >>> s3.str.isdigit() 0 True 1 True @@ -2487,8 +2490,10 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # Same as s.str.isdigit, but also includes other characters - >>> # that can represent quantities such as unicode fractions + The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also + includes other characters that can represent quantities such as unicode + fractions. + >>> s3.str.isnumeric() 0 True 1 True @@ -2498,8 +2503,7 @@ def rindex(self, sub, start=0, end=None): **Checks for Whitespace** - >>> # All characters represent whitespace - >>> s4 = pd.Series([' ','\\t\\r\\n ', '']) + >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) >>> s4.str.isspace() 0 True 1 True @@ -2508,9 +2512,8 @@ def rindex(self, sub, start=0, end=None): **Checks for Character Case** - >>> s5 = pd.Series(['leopard', 'Golden Eagal', 'SNAKE', '']) + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) - >>> # All characters are lowercase >>> s5.str.islower() 0 True 1 False @@ -2518,7 +2521,6 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # All characters are uppercase >>> s5.str.isupper() 0 False 1 False @@ -2526,7 +2528,11 @@ def rindex(self, sub, start=0, end=None): 3 False dtype: bool - >>> # All words are in title case (first letter of each word capitalized) + The ``s5.str.istitle`` method checks for whether all words are in title + case (whether only the first letter of each word is capitalized). Words are + assumed to be as any sequence of non-numeric characters seperated by + whitespace characters. + >>> s5.str.istitle() 0 False 1 True