Skip to content

BUG: Fix Series.str has no method "isascii()" #59096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,9 @@ def _str_isalpha(self):
result = pc.utf8_is_alpha(self._pa_array)
return self._result_converter(result)

def _str_isascii(self):
return super()._str_isascii()

def _str_isdecimal(self):
result = pc.utf8_is_decimal(self._pa_array)
return self._result_converter(result)
Expand Down
15 changes: 14 additions & 1 deletion pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3320,7 +3320,8 @@ def casefold(self):

This is equivalent to running the Python string method
:meth:`str.%(method)s` for each element of the Series/Index. If a string
has zero characters, ``False`` is returned for that check.
has zero characters, ``False`` is returned for that check
except for the `isascii` method.

Returns
-------
Expand All @@ -3333,6 +3334,7 @@ def casefold(self):
Series.str.isalpha : Check whether all characters are alphabetic.
Series.str.isnumeric : Check whether all characters are numeric.
Series.str.isalnum : Check whether all characters are alphanumeric.
Series.str.isascii : Check whether all characters are ASCII characters.
Series.str.isdigit : Check whether all characters are digits.
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Expand Down Expand Up @@ -3367,6 +3369,13 @@ def casefold(self):
3 False
dtype: bool

>>> s1.str.isascii()
0 True
1 True
2 True
3 True
dtype: bool

Note that checks against characters mixed with any additional punctuation
or whitespace will evaluate to false for an alphanumeric check.

Expand Down Expand Up @@ -3457,6 +3466,7 @@ def casefold(self):
"""
_doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"}
_doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"}
_doc_args["isascii"] = {"type": "ASCII characters", "method": "isascii"}
_doc_args["isdigit"] = {"type": "digits", "method": "isdigit"}
_doc_args["isspace"] = {"type": "whitespace", "method": "isspace"}
_doc_args["islower"] = {"type": "lowercase", "method": "islower"}
Expand All @@ -3472,6 +3482,9 @@ def casefold(self):
isalpha = _map_and_wrap(
"isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"]
)
isascii = _map_and_wrap(
"isascii", docstring=_shared_docs["ismethods"] % _doc_args["isascii"]
)
isdigit = _map_and_wrap(
"isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"]
)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/strings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ def _str_isalnum(self):
def _str_isalpha(self):
pass

@abc.abstractmethod
def _str_isascii(self):
pass

@abc.abstractmethod
def _str_isdecimal(self):
pass
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ def _str_isalnum(self):
def _str_isalpha(self):
return self._str_map(str.isalpha, dtype="bool")

def _str_isascii(self):
return self._str_map(str.isascii, dtype="bool")

def _str_isdecimal(self):
return self._str_map(str.isdecimal, dtype="bool")

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/strings/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"get_dummies",
"isalnum",
"isalpha",
"isascii",
"isdecimal",
"isdigit",
"islower",
Expand Down Expand Up @@ -97,7 +98,6 @@
)
ids, _, _ = zip(*_any_string_method) # use method name as fixture-id
missing_methods = {f for f in dir(StringMethods) if not f.startswith("_")} - set(ids)

# test that the above list captures all methods of StringMethods
assert not missing_methods

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,19 @@ def test_ismethods(method, expected, any_string_dtype):
assert list(result) == expected


def test_isascii(any_string_dtype):
ser = Series(
["a", "bb", "123", "あ", "\n", "", " ", "¼"],
dtype=any_string_dtype,
)
expected_dtype = "bool" if any_string_dtype in object_pyarrow_numpy else "boolean"
result = ser.str.isascii()
expected = Series(
[True, True, True, False, True, True, True, False], dtype=expected_dtype
)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"method, expected",
[
Expand Down
Loading