Skip to content

Commit b12df46

Browse files
Backport PR #39188: REGR: Different results from DataFrame.apply and str accessor (#39199)
Co-authored-by: Simon Hawkins <[email protected]>
1 parent dd353a1 commit b12df46

File tree

3 files changed

+50
-42
lines changed

3 files changed

+50
-42
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2525
- Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`)
2626
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
27+
- Fixed regression in :meth:`DataFrame.apply` with ``axis=1`` using str accessor in apply function (:issue:`38979`)
2728
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
2829
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
2930
- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`)

pandas/core/strings/accessor.py

+41-42
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def wrapper(self, *args, **kwargs):
109109
def _map_and_wrap(name, docstring):
110110
@forbid_nonstring_types(["bytes"], name=name)
111111
def wrapper(self):
112-
result = getattr(self._array, f"_str_{name}")()
112+
result = getattr(self._data.array, f"_str_{name}")()
113113
return self._wrap_result(result)
114114

115115
wrapper.__doc__ = docstring
@@ -154,8 +154,7 @@ def __init__(self, data):
154154
self._inferred_dtype = self._validate(data)
155155
self._is_categorical = is_categorical_dtype(data.dtype)
156156
self._is_string = isinstance(data.dtype, StringDtype)
157-
array = data.array
158-
self._array = array
157+
self._data = data
159158

160159
self._index = self._name = None
161160
if isinstance(data, ABCSeries):
@@ -219,7 +218,7 @@ def _validate(data):
219218
return inferred_dtype
220219

221220
def __getitem__(self, key):
222-
result = self._array._str_getitem(key)
221+
result = self._data.array._str_getitem(key)
223222
return self._wrap_result(result)
224223

225224
def __iter__(self):
@@ -744,13 +743,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
744743
@Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
745744
@forbid_nonstring_types(["bytes"])
746745
def split(self, pat=None, n=-1, expand=False):
747-
result = self._array._str_split(pat, n, expand)
746+
result = self._data.array._str_split(pat, n, expand)
748747
return self._wrap_result(result, returns_string=expand, expand=expand)
749748

750749
@Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
751750
@forbid_nonstring_types(["bytes"])
752751
def rsplit(self, pat=None, n=-1, expand=False):
753-
result = self._array._str_rsplit(pat, n=n)
752+
result = self._data.array._str_rsplit(pat, n=n)
754753
return self._wrap_result(result, expand=expand, returns_string=expand)
755754

756755
_shared_docs[
@@ -846,7 +845,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
846845
)
847846
@forbid_nonstring_types(["bytes"])
848847
def partition(self, sep=" ", expand=True):
849-
result = self._array._str_partition(sep, expand)
848+
result = self._data.array._str_partition(sep, expand)
850849
return self._wrap_result(result, expand=expand, returns_string=expand)
851850

852851
@Appender(
@@ -860,7 +859,7 @@ def partition(self, sep=" ", expand=True):
860859
)
861860
@forbid_nonstring_types(["bytes"])
862861
def rpartition(self, sep=" ", expand=True):
863-
result = self._array._str_rpartition(sep, expand)
862+
result = self._data.array._str_rpartition(sep, expand)
864863
return self._wrap_result(result, expand=expand, returns_string=expand)
865864

866865
def get(self, i):
@@ -914,7 +913,7 @@ def get(self, i):
914913
5 None
915914
dtype: object
916915
"""
917-
result = self._array._str_get(i)
916+
result = self._data.array._str_get(i)
918917
return self._wrap_result(result)
919918

920919
@forbid_nonstring_types(["bytes"])
@@ -980,7 +979,7 @@ def join(self, sep):
980979
4 NaN
981980
dtype: object
982981
"""
983-
result = self._array._str_join(sep)
982+
result = self._data.array._str_join(sep)
984983
return self._wrap_result(result)
985984

986985
@forbid_nonstring_types(["bytes"])
@@ -1108,7 +1107,7 @@ def contains(self, pat, case=True, flags=0, na=None, regex=True):
11081107
4 False
11091108
dtype: bool
11101109
"""
1111-
result = self._array._str_contains(pat, case, flags, na, regex)
1110+
result = self._data.array._str_contains(pat, case, flags, na, regex)
11121111
return self._wrap_result(result, fill_value=na, returns_string=False)
11131112

11141113
@forbid_nonstring_types(["bytes"])
@@ -1140,7 +1139,7 @@ def match(self, pat, case=True, flags=0, na=None):
11401139
re.match.
11411140
extract : Extract matched groups.
11421141
"""
1143-
result = self._array._str_match(pat, case=case, flags=flags, na=na)
1142+
result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
11441143
return self._wrap_result(result, fill_value=na, returns_string=False)
11451144

11461145
@forbid_nonstring_types(["bytes"])
@@ -1173,7 +1172,7 @@ def fullmatch(self, pat, case=True, flags=0, na=None):
11731172
matches the regular expression.
11741173
extract : Extract matched groups.
11751174
"""
1176-
result = self._array._str_fullmatch(pat, case=case, flags=flags, na=na)
1175+
result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na)
11771176
return self._wrap_result(result, fill_value=na, returns_string=False)
11781177

11791178
@forbid_nonstring_types(["bytes"])
@@ -1309,7 +1308,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
13091308
)
13101309
warnings.warn(msg, FutureWarning, stacklevel=3)
13111310
regex = True
1312-
result = self._array._str_replace(
1311+
result = self._data.array._str_replace(
13131312
pat, repl, n=n, case=case, flags=flags, regex=regex
13141313
)
13151314
return self._wrap_result(result)
@@ -1355,7 +1354,7 @@ def repeat(self, repeats):
13551354
2 ccc
13561355
dtype: object
13571356
"""
1358-
result = self._array._str_repeat(repeats)
1357+
result = self._data.array._str_repeat(repeats)
13591358
return self._wrap_result(result)
13601359

13611360
@forbid_nonstring_types(["bytes"])
@@ -1423,7 +1422,7 @@ def pad(self, width, side="left", fillchar=" "):
14231422
msg = f"width must be of integer type, not {type(width).__name__}"
14241423
raise TypeError(msg)
14251424

1426-
result = self._array._str_pad(width, side=side, fillchar=fillchar)
1425+
result = self._data.array._str_pad(width, side=side, fillchar=fillchar)
14271426
return self._wrap_result(result)
14281427

14291428
_shared_docs[
@@ -1597,7 +1596,7 @@ def slice(self, start=None, stop=None, step=None):
15971596
2 cm
15981597
dtype: object
15991598
"""
1600-
result = self._array._str_slice(start, stop, step)
1599+
result = self._data.array._str_slice(start, stop, step)
16011600
return self._wrap_result(result)
16021601

16031602
@forbid_nonstring_types(["bytes"])
@@ -1673,7 +1672,7 @@ def slice_replace(self, start=None, stop=None, repl=None):
16731672
4 aXde
16741673
dtype: object
16751674
"""
1676-
result = self._array._str_slice_replace(start, stop, repl)
1675+
result = self._data.array._str_slice_replace(start, stop, repl)
16771676
return self._wrap_result(result)
16781677

16791678
def decode(self, encoding, errors="strict"):
@@ -1699,7 +1698,7 @@ def decode(self, encoding, errors="strict"):
16991698
else:
17001699
decoder = codecs.getdecoder(encoding)
17011700
f = lambda x: decoder(x, errors)[0]
1702-
arr = self._array
1701+
arr = self._data.array
17031702
# assert isinstance(arr, (StringArray,))
17041703
result = arr._str_map(f)
17051704
return self._wrap_result(result)
@@ -1720,7 +1719,7 @@ def encode(self, encoding, errors="strict"):
17201719
-------
17211720
encoded : Series/Index of objects
17221721
"""
1723-
result = self._array._str_encode(encoding, errors)
1722+
result = self._data.array._str_encode(encoding, errors)
17241723
return self._wrap_result(result, returns_string=False)
17251724

17261725
_shared_docs[
@@ -1798,7 +1797,7 @@ def encode(self, encoding, errors="strict"):
17981797
)
17991798
@forbid_nonstring_types(["bytes"])
18001799
def strip(self, to_strip=None):
1801-
result = self._array._str_strip(to_strip)
1800+
result = self._data.array._str_strip(to_strip)
18021801
return self._wrap_result(result)
18031802

18041803
@Appender(
@@ -1807,7 +1806,7 @@ def strip(self, to_strip=None):
18071806
)
18081807
@forbid_nonstring_types(["bytes"])
18091808
def lstrip(self, to_strip=None):
1810-
result = self._array._str_lstrip(to_strip)
1809+
result = self._data.array._str_lstrip(to_strip)
18111810
return self._wrap_result(result)
18121811

18131812
@Appender(
@@ -1816,7 +1815,7 @@ def lstrip(self, to_strip=None):
18161815
)
18171816
@forbid_nonstring_types(["bytes"])
18181817
def rstrip(self, to_strip=None):
1819-
result = self._array._str_rstrip(to_strip)
1818+
result = self._data.array._str_rstrip(to_strip)
18201819
return self._wrap_result(result)
18211820

18221821
@forbid_nonstring_types(["bytes"])
@@ -1875,7 +1874,7 @@ def wrap(self, width, **kwargs):
18751874
1 another line\nto be\nwrapped
18761875
dtype: object
18771876
"""
1878-
result = self._array._str_wrap(width, **kwargs)
1877+
result = self._data.array._str_wrap(width, **kwargs)
18791878
return self._wrap_result(result)
18801879

18811880
@forbid_nonstring_types(["bytes"])
@@ -1917,7 +1916,7 @@ def get_dummies(self, sep="|"):
19171916
"""
19181917
# we need to cast to Series of strings as only that has all
19191918
# methods available for making the dummies...
1920-
result, name = self._array._str_get_dummies(sep)
1919+
result, name = self._data.array._str_get_dummies(sep)
19211920
return self._wrap_result(
19221921
result,
19231922
name=name,
@@ -1944,7 +1943,7 @@ def translate(self, table):
19441943
-------
19451944
Series or Index
19461945
"""
1947-
result = self._array._str_translate(table)
1946+
result = self._data.array._str_translate(table)
19481947
return self._wrap_result(result)
19491948

19501949
@forbid_nonstring_types(["bytes"])
@@ -2012,7 +2011,7 @@ def count(self, pat, flags=0):
20122011
>>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
20132012
Int64Index([0, 0, 2, 1], dtype='int64')
20142013
"""
2015-
result = self._array._str_count(pat, flags)
2014+
result = self._data.array._str_count(pat, flags)
20162015
return self._wrap_result(result, returns_string=False)
20172016

20182017
@forbid_nonstring_types(["bytes"])
@@ -2069,7 +2068,7 @@ def startswith(self, pat, na=None):
20692068
3 False
20702069
dtype: bool
20712070
"""
2072-
result = self._array._str_startswith(pat, na=na)
2071+
result = self._data.array._str_startswith(pat, na=na)
20732072
return self._wrap_result(result, returns_string=False)
20742073

20752074
@forbid_nonstring_types(["bytes"])
@@ -2126,7 +2125,7 @@ def endswith(self, pat, na=None):
21262125
3 False
21272126
dtype: bool
21282127
"""
2129-
result = self._array._str_endswith(pat, na=na)
2128+
result = self._data.array._str_endswith(pat, na=na)
21302129
return self._wrap_result(result, returns_string=False)
21312130

21322131
@forbid_nonstring_types(["bytes"])
@@ -2219,7 +2218,7 @@ def findall(self, pat, flags=0):
22192218
2 [b, b]
22202219
dtype: object
22212220
"""
2222-
result = self._array._str_findall(pat, flags)
2221+
result = self._data.array._str_findall(pat, flags)
22232222
return self._wrap_result(result, returns_string=False)
22242223

22252224
@forbid_nonstring_types(["bytes"])
@@ -2426,7 +2425,7 @@ def find(self, sub, start=0, end=None):
24262425
msg = f"expected a string object, not {type(sub).__name__}"
24272426
raise TypeError(msg)
24282427

2429-
result = self._array._str_find(sub, start, end)
2428+
result = self._data.array._str_find(sub, start, end)
24302429
return self._wrap_result(result, returns_string=False)
24312430

24322431
@Appender(
@@ -2443,7 +2442,7 @@ def rfind(self, sub, start=0, end=None):
24432442
msg = f"expected a string object, not {type(sub).__name__}"
24442443
raise TypeError(msg)
24452444

2446-
result = self._array._str_rfind(sub, start=start, end=end)
2445+
result = self._data.array._str_rfind(sub, start=start, end=end)
24472446
return self._wrap_result(result, returns_string=False)
24482447

24492448
@forbid_nonstring_types(["bytes"])
@@ -2463,7 +2462,7 @@ def normalize(self, form):
24632462
-------
24642463
normalized : Series/Index of objects
24652464
"""
2466-
result = self._array._str_normalize(form)
2465+
result = self._data.array._str_normalize(form)
24672466
return self._wrap_result(result)
24682467

24692468
_shared_docs[
@@ -2510,7 +2509,7 @@ def index(self, sub, start=0, end=None):
25102509
msg = f"expected a string object, not {type(sub).__name__}"
25112510
raise TypeError(msg)
25122511

2513-
result = self._array._str_index(sub, start=start, end=end)
2512+
result = self._data.array._str_index(sub, start=start, end=end)
25142513
return self._wrap_result(result, returns_string=False)
25152514

25162515
@Appender(
@@ -2528,7 +2527,7 @@ def rindex(self, sub, start=0, end=None):
25282527
msg = f"expected a string object, not {type(sub).__name__}"
25292528
raise TypeError(msg)
25302529

2531-
result = self._array._str_rindex(sub, start=start, end=end)
2530+
result = self._data.array._str_rindex(sub, start=start, end=end)
25322531
return self._wrap_result(result, returns_string=False)
25332532

25342533
def len(self):
@@ -2577,7 +2576,7 @@ def len(self):
25772576
5 3.0
25782577
dtype: float64
25792578
"""
2580-
result = self._array._str_len()
2579+
result = self._data.array._str_len()
25812580
return self._wrap_result(result, returns_string=False)
25822581

25832582
_shared_docs[
@@ -2677,37 +2676,37 @@ def len(self):
26772676
@Appender(_shared_docs["casemethods"] % _doc_args["lower"])
26782677
@forbid_nonstring_types(["bytes"])
26792678
def lower(self):
2680-
result = self._array._str_lower()
2679+
result = self._data.array._str_lower()
26812680
return self._wrap_result(result)
26822681

26832682
@Appender(_shared_docs["casemethods"] % _doc_args["upper"])
26842683
@forbid_nonstring_types(["bytes"])
26852684
def upper(self):
2686-
result = self._array._str_upper()
2685+
result = self._data.array._str_upper()
26872686
return self._wrap_result(result)
26882687

26892688
@Appender(_shared_docs["casemethods"] % _doc_args["title"])
26902689
@forbid_nonstring_types(["bytes"])
26912690
def title(self):
2692-
result = self._array._str_title()
2691+
result = self._data.array._str_title()
26932692
return self._wrap_result(result)
26942693

26952694
@Appender(_shared_docs["casemethods"] % _doc_args["capitalize"])
26962695
@forbid_nonstring_types(["bytes"])
26972696
def capitalize(self):
2698-
result = self._array._str_capitalize()
2697+
result = self._data.array._str_capitalize()
26992698
return self._wrap_result(result)
27002699

27012700
@Appender(_shared_docs["casemethods"] % _doc_args["swapcase"])
27022701
@forbid_nonstring_types(["bytes"])
27032702
def swapcase(self):
2704-
result = self._array._str_swapcase()
2703+
result = self._data.array._str_swapcase()
27052704
return self._wrap_result(result)
27062705

27072706
@Appender(_shared_docs["casemethods"] % _doc_args["casefold"])
27082707
@forbid_nonstring_types(["bytes"])
27092708
def casefold(self):
2710-
result = self._array._str_casefold()
2709+
result = self._data.array._str_casefold()
27112710
return self._wrap_result(result)
27122711

27132712
_shared_docs[

pandas/tests/test_strings.py

+8
Original file line numberDiff line numberDiff line change
@@ -3670,3 +3670,11 @@ def test_str_get_stringarray_multiple_nans():
36703670
result = s.str.get(2)
36713671
expected = Series(pd.array([pd.NA, pd.NA, pd.NA, "c"]))
36723672
tm.assert_series_equal(result, expected)
3673+
3674+
3675+
def test_str_accessor_in_apply_func():
3676+
# https://github.com/pandas-dev/pandas/issues/38979
3677+
df = DataFrame(zip("abc", "def"))
3678+
expected = Series(["A/D", "B/E", "C/F"])
3679+
result = df.apply(lambda f: "/".join(f.str.upper()), axis=1)
3680+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)