Skip to content

Commit 2c80640

Browse files
REGR: Different results from DataFrame.apply and str accessor (#39188)
1 parent 9d13997 commit 2c80640

File tree

3 files changed

+50
-42
lines changed

3 files changed

+50
-42
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2525
- Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`)
2626
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
27+
- Fixed regression in :meth:`DataFrame.apply` with ``axis=1`` using str accessor in apply function (:issue:`38979`)
2728
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
2829
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
2930
- Fixed regression in :meth:`Series.fillna` that raised ``RecursionError`` with ``datetime64[ns, UTC]`` dtype (:issue:`38851`)

pandas/core/strings/accessor.py

+41-42
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def wrapper(self, *args, **kwargs):
104104
def _map_and_wrap(name, docstring):
105105
@forbid_nonstring_types(["bytes"], name=name)
106106
def wrapper(self):
107-
result = getattr(self._array, f"_str_{name}")()
107+
result = getattr(self._data.array, f"_str_{name}")()
108108
return self._wrap_result(result)
109109

110110
wrapper.__doc__ = docstring
@@ -149,8 +149,7 @@ def __init__(self, data):
149149
self._inferred_dtype = self._validate(data)
150150
self._is_categorical = is_categorical_dtype(data.dtype)
151151
self._is_string = isinstance(data.dtype, StringDtype)
152-
array = data.array
153-
self._array = array
152+
self._data = data
154153

155154
self._index = self._name = None
156155
if isinstance(data, ABCSeries):
@@ -214,7 +213,7 @@ def _validate(data):
214213
return inferred_dtype
215214

216215
def __getitem__(self, key):
217-
result = self._array._str_getitem(key)
216+
result = self._data.array._str_getitem(key)
218217
return self._wrap_result(result)
219218

220219
def __iter__(self):
@@ -739,13 +738,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
739738
@Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
740739
@forbid_nonstring_types(["bytes"])
741740
def split(self, pat=None, n=-1, expand=False):
742-
result = self._array._str_split(pat, n, expand)
741+
result = self._data.array._str_split(pat, n, expand)
743742
return self._wrap_result(result, returns_string=expand, expand=expand)
744743

745744
@Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
746745
@forbid_nonstring_types(["bytes"])
747746
def rsplit(self, pat=None, n=-1, expand=False):
748-
result = self._array._str_rsplit(pat, n=n)
747+
result = self._data.array._str_rsplit(pat, n=n)
749748
return self._wrap_result(result, expand=expand, returns_string=expand)
750749

751750
_shared_docs[
@@ -841,7 +840,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
841840
)
842841
@forbid_nonstring_types(["bytes"])
843842
def partition(self, sep=" ", expand=True):
844-
result = self._array._str_partition(sep, expand)
843+
result = self._data.array._str_partition(sep, expand)
845844
return self._wrap_result(result, expand=expand, returns_string=expand)
846845

847846
@Appender(
@@ -855,7 +854,7 @@ def partition(self, sep=" ", expand=True):
855854
)
856855
@forbid_nonstring_types(["bytes"])
857856
def rpartition(self, sep=" ", expand=True):
858-
result = self._array._str_rpartition(sep, expand)
857+
result = self._data.array._str_rpartition(sep, expand)
859858
return self._wrap_result(result, expand=expand, returns_string=expand)
860859

861860
def get(self, i):
@@ -909,7 +908,7 @@ def get(self, i):
909908
5 None
910909
dtype: object
911910
"""
912-
result = self._array._str_get(i)
911+
result = self._data.array._str_get(i)
913912
return self._wrap_result(result)
914913

915914
@forbid_nonstring_types(["bytes"])
@@ -975,7 +974,7 @@ def join(self, sep):
975974
4 NaN
976975
dtype: object
977976
"""
978-
result = self._array._str_join(sep)
977+
result = self._data.array._str_join(sep)
979978
return self._wrap_result(result)
980979

981980
@forbid_nonstring_types(["bytes"])
@@ -1103,7 +1102,7 @@ def contains(self, pat, case=True, flags=0, na=None, regex=True):
11031102
4 False
11041103
dtype: bool
11051104
"""
1106-
result = self._array._str_contains(pat, case, flags, na, regex)
1105+
result = self._data.array._str_contains(pat, case, flags, na, regex)
11071106
return self._wrap_result(result, fill_value=na, returns_string=False)
11081107

11091108
@forbid_nonstring_types(["bytes"])
@@ -1135,7 +1134,7 @@ def match(self, pat, case=True, flags=0, na=None):
11351134
re.match.
11361135
extract : Extract matched groups.
11371136
"""
1138-
result = self._array._str_match(pat, case=case, flags=flags, na=na)
1137+
result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
11391138
return self._wrap_result(result, fill_value=na, returns_string=False)
11401139

11411140
@forbid_nonstring_types(["bytes"])
@@ -1168,7 +1167,7 @@ def fullmatch(self, pat, case=True, flags=0, na=None):
11681167
matches the regular expression.
11691168
extract : Extract matched groups.
11701169
"""
1171-
result = self._array._str_fullmatch(pat, case=case, flags=flags, na=na)
1170+
result = self._data.array._str_fullmatch(pat, case=case, flags=flags, na=na)
11721171
return self._wrap_result(result, fill_value=na, returns_string=False)
11731172

11741173
@forbid_nonstring_types(["bytes"])
@@ -1304,7 +1303,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
13041303
)
13051304
warnings.warn(msg, FutureWarning, stacklevel=3)
13061305
regex = True
1307-
result = self._array._str_replace(
1306+
result = self._data.array._str_replace(
13081307
pat, repl, n=n, case=case, flags=flags, regex=regex
13091308
)
13101309
return self._wrap_result(result)
@@ -1350,7 +1349,7 @@ def repeat(self, repeats):
13501349
2 ccc
13511350
dtype: object
13521351
"""
1353-
result = self._array._str_repeat(repeats)
1352+
result = self._data.array._str_repeat(repeats)
13541353
return self._wrap_result(result)
13551354

13561355
@forbid_nonstring_types(["bytes"])
@@ -1418,7 +1417,7 @@ def pad(self, width, side="left", fillchar=" "):
14181417
msg = f"width must be of integer type, not {type(width).__name__}"
14191418
raise TypeError(msg)
14201419

1421-
result = self._array._str_pad(width, side=side, fillchar=fillchar)
1420+
result = self._data.array._str_pad(width, side=side, fillchar=fillchar)
14221421
return self._wrap_result(result)
14231422

14241423
_shared_docs[
@@ -1592,7 +1591,7 @@ def slice(self, start=None, stop=None, step=None):
15921591
2 cm
15931592
dtype: object
15941593
"""
1595-
result = self._array._str_slice(start, stop, step)
1594+
result = self._data.array._str_slice(start, stop, step)
15961595
return self._wrap_result(result)
15971596

15981597
@forbid_nonstring_types(["bytes"])
@@ -1668,7 +1667,7 @@ def slice_replace(self, start=None, stop=None, repl=None):
16681667
4 aXde
16691668
dtype: object
16701669
"""
1671-
result = self._array._str_slice_replace(start, stop, repl)
1670+
result = self._data.array._str_slice_replace(start, stop, repl)
16721671
return self._wrap_result(result)
16731672

16741673
def decode(self, encoding, errors="strict"):
@@ -1694,7 +1693,7 @@ def decode(self, encoding, errors="strict"):
16941693
else:
16951694
decoder = codecs.getdecoder(encoding)
16961695
f = lambda x: decoder(x, errors)[0]
1697-
arr = self._array
1696+
arr = self._data.array
16981697
# assert isinstance(arr, (StringArray,))
16991698
result = arr._str_map(f)
17001699
return self._wrap_result(result)
@@ -1715,7 +1714,7 @@ def encode(self, encoding, errors="strict"):
17151714
-------
17161715
encoded : Series/Index of objects
17171716
"""
1718-
result = self._array._str_encode(encoding, errors)
1717+
result = self._data.array._str_encode(encoding, errors)
17191718
return self._wrap_result(result, returns_string=False)
17201719

17211720
_shared_docs[
@@ -1793,7 +1792,7 @@ def encode(self, encoding, errors="strict"):
17931792
)
17941793
@forbid_nonstring_types(["bytes"])
17951794
def strip(self, to_strip=None):
1796-
result = self._array._str_strip(to_strip)
1795+
result = self._data.array._str_strip(to_strip)
17971796
return self._wrap_result(result)
17981797

17991798
@Appender(
@@ -1802,7 +1801,7 @@ def strip(self, to_strip=None):
18021801
)
18031802
@forbid_nonstring_types(["bytes"])
18041803
def lstrip(self, to_strip=None):
1805-
result = self._array._str_lstrip(to_strip)
1804+
result = self._data.array._str_lstrip(to_strip)
18061805
return self._wrap_result(result)
18071806

18081807
@Appender(
@@ -1811,7 +1810,7 @@ def lstrip(self, to_strip=None):
18111810
)
18121811
@forbid_nonstring_types(["bytes"])
18131812
def rstrip(self, to_strip=None):
1814-
result = self._array._str_rstrip(to_strip)
1813+
result = self._data.array._str_rstrip(to_strip)
18151814
return self._wrap_result(result)
18161815

18171816
@forbid_nonstring_types(["bytes"])
@@ -1870,7 +1869,7 @@ def wrap(self, width, **kwargs):
18701869
1 another line\nto be\nwrapped
18711870
dtype: object
18721871
"""
1873-
result = self._array._str_wrap(width, **kwargs)
1872+
result = self._data.array._str_wrap(width, **kwargs)
18741873
return self._wrap_result(result)
18751874

18761875
@forbid_nonstring_types(["bytes"])
@@ -1912,7 +1911,7 @@ def get_dummies(self, sep="|"):
19121911
"""
19131912
# we need to cast to Series of strings as only that has all
19141913
# methods available for making the dummies...
1915-
result, name = self._array._str_get_dummies(sep)
1914+
result, name = self._data.array._str_get_dummies(sep)
19161915
return self._wrap_result(
19171916
result,
19181917
name=name,
@@ -1939,7 +1938,7 @@ def translate(self, table):
19391938
-------
19401939
Series or Index
19411940
"""
1942-
result = self._array._str_translate(table)
1941+
result = self._data.array._str_translate(table)
19431942
return self._wrap_result(result)
19441943

19451944
@forbid_nonstring_types(["bytes"])
@@ -2007,7 +2006,7 @@ def count(self, pat, flags=0):
20072006
>>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
20082007
Int64Index([0, 0, 2, 1], dtype='int64')
20092008
"""
2010-
result = self._array._str_count(pat, flags)
2009+
result = self._data.array._str_count(pat, flags)
20112010
return self._wrap_result(result, returns_string=False)
20122011

20132012
@forbid_nonstring_types(["bytes"])
@@ -2064,7 +2063,7 @@ def startswith(self, pat, na=None):
20642063
3 False
20652064
dtype: bool
20662065
"""
2067-
result = self._array._str_startswith(pat, na=na)
2066+
result = self._data.array._str_startswith(pat, na=na)
20682067
return self._wrap_result(result, returns_string=False)
20692068

20702069
@forbid_nonstring_types(["bytes"])
@@ -2121,7 +2120,7 @@ def endswith(self, pat, na=None):
21212120
3 False
21222121
dtype: bool
21232122
"""
2124-
result = self._array._str_endswith(pat, na=na)
2123+
result = self._data.array._str_endswith(pat, na=na)
21252124
return self._wrap_result(result, returns_string=False)
21262125

21272126
@forbid_nonstring_types(["bytes"])
@@ -2214,7 +2213,7 @@ def findall(self, pat, flags=0):
22142213
2 [b, b]
22152214
dtype: object
22162215
"""
2217-
result = self._array._str_findall(pat, flags)
2216+
result = self._data.array._str_findall(pat, flags)
22182217
return self._wrap_result(result, returns_string=False)
22192218

22202219
@forbid_nonstring_types(["bytes"])
@@ -2421,7 +2420,7 @@ def find(self, sub, start=0, end=None):
24212420
msg = f"expected a string object, not {type(sub).__name__}"
24222421
raise TypeError(msg)
24232422

2424-
result = self._array._str_find(sub, start, end)
2423+
result = self._data.array._str_find(sub, start, end)
24252424
return self._wrap_result(result, returns_string=False)
24262425

24272426
@Appender(
@@ -2438,7 +2437,7 @@ def rfind(self, sub, start=0, end=None):
24382437
msg = f"expected a string object, not {type(sub).__name__}"
24392438
raise TypeError(msg)
24402439

2441-
result = self._array._str_rfind(sub, start=start, end=end)
2440+
result = self._data.array._str_rfind(sub, start=start, end=end)
24422441
return self._wrap_result(result, returns_string=False)
24432442

24442443
@forbid_nonstring_types(["bytes"])
@@ -2458,7 +2457,7 @@ def normalize(self, form):
24582457
-------
24592458
normalized : Series/Index of objects
24602459
"""
2461-
result = self._array._str_normalize(form)
2460+
result = self._data.array._str_normalize(form)
24622461
return self._wrap_result(result)
24632462

24642463
_shared_docs[
@@ -2505,7 +2504,7 @@ def index(self, sub, start=0, end=None):
25052504
msg = f"expected a string object, not {type(sub).__name__}"
25062505
raise TypeError(msg)
25072506

2508-
result = self._array._str_index(sub, start=start, end=end)
2507+
result = self._data.array._str_index(sub, start=start, end=end)
25092508
return self._wrap_result(result, returns_string=False)
25102509

25112510
@Appender(
@@ -2523,7 +2522,7 @@ def rindex(self, sub, start=0, end=None):
25232522
msg = f"expected a string object, not {type(sub).__name__}"
25242523
raise TypeError(msg)
25252524

2526-
result = self._array._str_rindex(sub, start=start, end=end)
2525+
result = self._data.array._str_rindex(sub, start=start, end=end)
25272526
return self._wrap_result(result, returns_string=False)
25282527

25292528
def len(self):
@@ -2572,7 +2571,7 @@ def len(self):
25722571
5 3.0
25732572
dtype: float64
25742573
"""
2575-
result = self._array._str_len()
2574+
result = self._data.array._str_len()
25762575
return self._wrap_result(result, returns_string=False)
25772576

25782577
_shared_docs[
@@ -2672,37 +2671,37 @@ def len(self):
26722671
@Appender(_shared_docs["casemethods"] % _doc_args["lower"])
26732672
@forbid_nonstring_types(["bytes"])
26742673
def lower(self):
2675-
result = self._array._str_lower()
2674+
result = self._data.array._str_lower()
26762675
return self._wrap_result(result)
26772676

26782677
@Appender(_shared_docs["casemethods"] % _doc_args["upper"])
26792678
@forbid_nonstring_types(["bytes"])
26802679
def upper(self):
2681-
result = self._array._str_upper()
2680+
result = self._data.array._str_upper()
26822681
return self._wrap_result(result)
26832682

26842683
@Appender(_shared_docs["casemethods"] % _doc_args["title"])
26852684
@forbid_nonstring_types(["bytes"])
26862685
def title(self):
2687-
result = self._array._str_title()
2686+
result = self._data.array._str_title()
26882687
return self._wrap_result(result)
26892688

26902689
@Appender(_shared_docs["casemethods"] % _doc_args["capitalize"])
26912690
@forbid_nonstring_types(["bytes"])
26922691
def capitalize(self):
2693-
result = self._array._str_capitalize()
2692+
result = self._data.array._str_capitalize()
26942693
return self._wrap_result(result)
26952694

26962695
@Appender(_shared_docs["casemethods"] % _doc_args["swapcase"])
26972696
@forbid_nonstring_types(["bytes"])
26982697
def swapcase(self):
2699-
result = self._array._str_swapcase()
2698+
result = self._data.array._str_swapcase()
27002699
return self._wrap_result(result)
27012700

27022701
@Appender(_shared_docs["casemethods"] % _doc_args["casefold"])
27032702
@forbid_nonstring_types(["bytes"])
27042703
def casefold(self):
2705-
result = self._array._str_casefold()
2704+
result = self._data.array._str_casefold()
27062705
return self._wrap_result(result)
27072706

27082707
_shared_docs[

pandas/tests/test_strings.py

+8
Original file line numberDiff line numberDiff line change
@@ -3670,3 +3670,11 @@ def test_str_get_stringarray_multiple_nans():
36703670
result = s.str.get(2)
36713671
expected = Series(pd.array([pd.NA, pd.NA, pd.NA, "c"]))
36723672
tm.assert_series_equal(result, expected)
3673+
3674+
3675+
def test_str_accessor_in_apply_func():
3676+
# https://github.com/pandas-dev/pandas/issues/38979
3677+
df = DataFrame(zip("abc", "def"))
3678+
expected = Series(["A/D", "B/E", "C/F"])
3679+
result = df.apply(lambda f: "/".join(f.str.upper()), axis=1)
3680+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)