diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index a6ac40b5203bf..b406893e3414a 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -409,6 +409,7 @@ strings and apply several methods to it. These can be accessed like :template: autosummary/accessor_method.rst Series.str.capitalize + Series.str.casefold Series.str.cat Series.str.center Series.str.contains diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index e4f60a761750d..6f21a7d9beb36 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -600,6 +600,7 @@ Method Summary :meth:`~Series.str.partition`;Equivalent to ``str.partition`` :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition`` :meth:`~Series.str.lower`;Equivalent to ``str.lower`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` :meth:`~Series.str.upper`;Equivalent to ``str.upper`` :meth:`~Series.str.find`;Equivalent to ``str.find`` :meth:`~Series.str.rfind`;Equivalent to ``str.rfind`` diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da397..b94a18d863a41 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -22,6 +22,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) +- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) - diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cc7a4db515c42..9577b07360f65 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2926,7 +2926,7 @@ def rindex(self, sub, start=0, end=None): _shared_docs['casemethods'] = (""" Convert strings in the Series/Index to %(type)s. - + %(version)s Equivalent to :meth:`str.%(method)s`. Returns @@ -2943,6 +2943,7 @@ def rindex(self, sub, start=0, end=None): remaining to lowercase. Series.str.swapcase : Converts uppercase to lowercase and lowercase to uppercase. + Series.str.casefold: Removes all case distinctions in the string. Examples -------- @@ -2989,12 +2990,15 @@ def rindex(self, sub, start=0, end=None): 3 sWaPcAsE dtype: object """) - _shared_docs['lower'] = dict(type='lowercase', method='lower') - _shared_docs['upper'] = dict(type='uppercase', method='upper') - _shared_docs['title'] = dict(type='titlecase', method='title') + _shared_docs['lower'] = dict(type='lowercase', method='lower', version='') + _shared_docs['upper'] = dict(type='uppercase', method='upper', version='') + _shared_docs['title'] = dict(type='titlecase', method='title', version='') _shared_docs['capitalize'] = dict(type='be capitalized', - method='capitalize') - _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase') + method='capitalize', version='') + _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase', + version='') + _shared_docs['casefold'] = dict(type='be casefolded', method='casefold', + version='\n .. versionadded:: 0.25.0\n') lower = _noarg_wrapper(lambda x: x.lower(), docstring=_shared_docs['casemethods'] % _shared_docs['lower']) @@ -3010,6 +3014,9 @@ def rindex(self, sub, start=0, end=None): swapcase = _noarg_wrapper(lambda x: x.swapcase(), docstring=_shared_docs['casemethods'] % _shared_docs['swapcase']) + casefold = _noarg_wrapper(lambda x: x.casefold(), + docstring=_shared_docs['casemethods'] % + _shared_docs['casefold']) _shared_docs['ismethods'] = (""" Check whether all characters in each string are %(type)s. diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7cea3be03d1a7..1ecfedc8685da 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -76,7 +76,7 @@ def assert_series_or_index_equal(left, right): 'len', 'lower', 'lstrip', 'partition', 'rpartition', 'rsplit', 'rstrip', 'slice', 'slice_replace', 'split', - 'strip', 'swapcase', 'title', 'upper' + 'strip', 'swapcase', 'title', 'upper', 'casefold' ], [()] * 100, [{}] * 100)) ids, _, _ = zip(*_any_string_method) # use method name as fixture-id @@ -3424,3 +3424,12 @@ def test_method_on_bytes(self): expected = Series(np.array( ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) + + @pytest.mark.skipif(compat.PY2, reason='not in python2') + def test_casefold(self): + # GH25405 + expected = Series(['ss', NA, 'case', 'ssd']) + s = Series(['ß', NA, 'case', 'ßd']) + result = s.str.casefold() + + tm.assert_series_equal(result, expected)