diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 55fddb8b732e2..982eb21f5a5da 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) - Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`) - Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`) +- Fixed regression in :meth:`DataFrame.apply` with ``axis=1`` using str accessor in apply function (:issue:`38979`) - Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`) - Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`) - Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 2713b76189157..ca12012ec135f 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -109,7 +109,7 @@ def wrapper(self, *args, **kwargs): def _map_and_wrap(name, docstring): @forbid_nonstring_types(["bytes"], name=name) def wrapper(self): - result = getattr(self._array, f"_str_{name}")() + result = getattr(self._data.array, f"_str_{name}")() return self._wrap_result(result) wrapper.__doc__ = docstring @@ -154,8 +154,7 @@ def __init__(self, data): self._inferred_dtype = self._validate(data) self._is_categorical = is_categorical_dtype(data.dtype) self._is_string = isinstance(data.dtype, StringDtype) - array = data.array - self._array = array + self._data = data self._index = self._name = None if isinstance(data, ABCSeries): @@ -219,7 +218,7 @@ def _validate(data): return inferred_dtype def __getitem__(self, key): - result = self._array._str_getitem(key) + result = self._data.array._str_getitem(key) return self._wrap_result(result) def __iter__(self): @@ -744,13 +743,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"): @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"}) @forbid_nonstring_types(["bytes"]) def split(self, pat=None, n=-1, expand=False): - result = self._array._str_split(pat, n, expand) + result = self._data.array._str_split(pat, n, expand) return self._wrap_result(result, returns_string=expand, expand=expand) @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"}) @forbid_nonstring_types(["bytes"]) def rsplit(self, pat=None, n=-1, expand=False): - result = self._array._str_rsplit(pat, n=n) + result = self._data.array._str_rsplit(pat, n=n) return self._wrap_result(result, expand=expand, returns_string=expand) _shared_docs[ @@ -846,7 +845,7 @@ def rsplit(self, pat=None, n=-1, expand=False): ) @forbid_nonstring_types(["bytes"]) def partition(self, sep=" ", expand=True): - result = self._array._str_partition(sep, expand) + result = self._data.array._str_partition(sep, expand) return self._wrap_result(result, expand=expand, returns_string=expand) @Appender( @@ -860,7 +859,7 @@ def partition(self, sep=" ", expand=True): ) @forbid_nonstring_types(["bytes"]) def rpartition(self, sep=" ", expand=True): - result = self._array._str_rpartition(sep, expand) + result = self._data.array._str_rpartition(sep, expand) return self._wrap_result(result, expand=expand, returns_string=expand) def get(self, i): @@ -914,7 +913,7 @@ def get(self, i): 5 None dtype: object """ - result = self._array._str_get(i) + result = self._data.array._str_get(i) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -980,7 +979,7 @@ def join(self, sep): 4 NaN dtype: object """ - result = self._array._str_join(sep) + result = self._data.array._str_join(sep) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -1108,7 +1107,7 @@ def contains(self, pat, case=True, flags=0, na=None, regex=True): 4 False dtype: bool """ - result = self._array._str_contains(pat, case, flags, na, regex) + result = self._data.array._str_contains(pat, case, flags, na, regex) return self._wrap_result(result, fill_value=na, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -1140,7 +1139,7 @@ def match(self, pat, case=True, flags=0, na=None): re.match. extract : Extract matched groups. """ - result = self._array._str_match(pat, case=case, flags=flags, na=na) + result = self._data.array._str_match(pat, case=case, flags=flags, na=na) return self._wrap_result(result, fill_value=na, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -1173,7 +1172,7 @@ def fullmatch(self, pat, case=True, flags=0, na=None): matches the regular expression. extract : Extract matched groups. """ - result = self._array._str_fullmatch(pat, case=case, flags=flags, na=na) + result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na) return self._wrap_result(result, fill_value=na, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -1309,7 +1308,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None): ) warnings.warn(msg, FutureWarning, stacklevel=3) regex = True - result = self._array._str_replace( + result = self._data.array._str_replace( pat, repl, n=n, case=case, flags=flags, regex=regex ) return self._wrap_result(result) @@ -1355,7 +1354,7 @@ def repeat(self, repeats): 2 ccc dtype: object """ - result = self._array._str_repeat(repeats) + result = self._data.array._str_repeat(repeats) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -1423,7 +1422,7 @@ def pad(self, width, side="left", fillchar=" "): msg = f"width must be of integer type, not {type(width).__name__}" raise TypeError(msg) - result = self._array._str_pad(width, side=side, fillchar=fillchar) + result = self._data.array._str_pad(width, side=side, fillchar=fillchar) return self._wrap_result(result) _shared_docs[ @@ -1597,7 +1596,7 @@ def slice(self, start=None, stop=None, step=None): 2 cm dtype: object """ - result = self._array._str_slice(start, stop, step) + result = self._data.array._str_slice(start, stop, step) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -1673,7 +1672,7 @@ def slice_replace(self, start=None, stop=None, repl=None): 4 aXde dtype: object """ - result = self._array._str_slice_replace(start, stop, repl) + result = self._data.array._str_slice_replace(start, stop, repl) return self._wrap_result(result) def decode(self, encoding, errors="strict"): @@ -1699,7 +1698,7 @@ def decode(self, encoding, errors="strict"): else: decoder = codecs.getdecoder(encoding) f = lambda x: decoder(x, errors)[0] - arr = self._array + arr = self._data.array # assert isinstance(arr, (StringArray,)) result = arr._str_map(f) return self._wrap_result(result) @@ -1720,7 +1719,7 @@ def encode(self, encoding, errors="strict"): ------- encoded : Series/Index of objects """ - result = self._array._str_encode(encoding, errors) + result = self._data.array._str_encode(encoding, errors) return self._wrap_result(result, returns_string=False) _shared_docs[ @@ -1798,7 +1797,7 @@ def encode(self, encoding, errors="strict"): ) @forbid_nonstring_types(["bytes"]) def strip(self, to_strip=None): - result = self._array._str_strip(to_strip) + result = self._data.array._str_strip(to_strip) return self._wrap_result(result) @Appender( @@ -1807,7 +1806,7 @@ def strip(self, to_strip=None): ) @forbid_nonstring_types(["bytes"]) def lstrip(self, to_strip=None): - result = self._array._str_lstrip(to_strip) + result = self._data.array._str_lstrip(to_strip) return self._wrap_result(result) @Appender( @@ -1816,7 +1815,7 @@ def lstrip(self, to_strip=None): ) @forbid_nonstring_types(["bytes"]) def rstrip(self, to_strip=None): - result = self._array._str_rstrip(to_strip) + result = self._data.array._str_rstrip(to_strip) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -1875,7 +1874,7 @@ def wrap(self, width, **kwargs): 1 another line\nto be\nwrapped dtype: object """ - result = self._array._str_wrap(width, **kwargs) + result = self._data.array._str_wrap(width, **kwargs) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -1917,7 +1916,7 @@ def get_dummies(self, sep="|"): """ # we need to cast to Series of strings as only that has all # methods available for making the dummies... - result, name = self._array._str_get_dummies(sep) + result, name = self._data.array._str_get_dummies(sep) return self._wrap_result( result, name=name, @@ -1944,7 +1943,7 @@ def translate(self, table): ------- Series or Index """ - result = self._array._str_translate(table) + result = self._data.array._str_translate(table) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) @@ -2012,7 +2011,7 @@ def count(self, pat, flags=0): >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') Int64Index([0, 0, 2, 1], dtype='int64') """ - result = self._array._str_count(pat, flags) + result = self._data.array._str_count(pat, flags) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -2069,7 +2068,7 @@ def startswith(self, pat, na=None): 3 False dtype: bool """ - result = self._array._str_startswith(pat, na=na) + result = self._data.array._str_startswith(pat, na=na) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -2126,7 +2125,7 @@ def endswith(self, pat, na=None): 3 False dtype: bool """ - result = self._array._str_endswith(pat, na=na) + result = self._data.array._str_endswith(pat, na=na) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -2219,7 +2218,7 @@ def findall(self, pat, flags=0): 2 [b, b] dtype: object """ - result = self._array._str_findall(pat, flags) + result = self._data.array._str_findall(pat, flags) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -2426,7 +2425,7 @@ def find(self, sub, start=0, end=None): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) - result = self._array._str_find(sub, start, end) + result = self._data.array._str_find(sub, start, end) return self._wrap_result(result, returns_string=False) @Appender( @@ -2443,7 +2442,7 @@ def rfind(self, sub, start=0, end=None): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) - result = self._array._str_rfind(sub, start=start, end=end) + result = self._data.array._str_rfind(sub, start=start, end=end) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) @@ -2463,7 +2462,7 @@ def normalize(self, form): ------- normalized : Series/Index of objects """ - result = self._array._str_normalize(form) + result = self._data.array._str_normalize(form) return self._wrap_result(result) _shared_docs[ @@ -2510,7 +2509,7 @@ def index(self, sub, start=0, end=None): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) - result = self._array._str_index(sub, start=start, end=end) + result = self._data.array._str_index(sub, start=start, end=end) return self._wrap_result(result, returns_string=False) @Appender( @@ -2528,7 +2527,7 @@ def rindex(self, sub, start=0, end=None): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) - result = self._array._str_rindex(sub, start=start, end=end) + result = self._data.array._str_rindex(sub, start=start, end=end) return self._wrap_result(result, returns_string=False) def len(self): @@ -2577,7 +2576,7 @@ def len(self): 5 3.0 dtype: float64 """ - result = self._array._str_len() + result = self._data.array._str_len() return self._wrap_result(result, returns_string=False) _shared_docs[ @@ -2677,37 +2676,37 @@ def len(self): @Appender(_shared_docs["casemethods"] % _doc_args["lower"]) @forbid_nonstring_types(["bytes"]) def lower(self): - result = self._array._str_lower() + result = self._data.array._str_lower() return self._wrap_result(result) @Appender(_shared_docs["casemethods"] % _doc_args["upper"]) @forbid_nonstring_types(["bytes"]) def upper(self): - result = self._array._str_upper() + result = self._data.array._str_upper() return self._wrap_result(result) @Appender(_shared_docs["casemethods"] % _doc_args["title"]) @forbid_nonstring_types(["bytes"]) def title(self): - result = self._array._str_title() + result = self._data.array._str_title() return self._wrap_result(result) @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"]) @forbid_nonstring_types(["bytes"]) def capitalize(self): - result = self._array._str_capitalize() + result = self._data.array._str_capitalize() return self._wrap_result(result) @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"]) @forbid_nonstring_types(["bytes"]) def swapcase(self): - result = self._array._str_swapcase() + result = self._data.array._str_swapcase() return self._wrap_result(result) @Appender(_shared_docs["casemethods"] % _doc_args["casefold"]) @forbid_nonstring_types(["bytes"]) def casefold(self): - result = self._array._str_casefold() + result = self._data.array._str_casefold() return self._wrap_result(result) _shared_docs[ diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 538a52d84b73a..a15b2d03079d4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -3670,3 +3670,11 @@ def test_str_get_stringarray_multiple_nans(): result = s.str.get(2) expected = Series(pd.array([pd.NA, pd.NA, pd.NA, "c"])) tm.assert_series_equal(result, expected) + + +def test_str_accessor_in_apply_func(): + # https://github.com/pandas-dev/pandas/issues/38979 + df = DataFrame(zip("abc", "def")) + expected = Series(["A/D", "B/E", "C/F"]) + result = df.apply(lambda f: "/".join(f.str.upper()), axis=1) + tm.assert_series_equal(result, expected)