Skip to content

Commit aad9ba3

Browse files
phoflmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#56152: BUG: translate losing object dtype with new string dtype
1 parent 2e48c71 commit aad9ba3

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

doc/source/whatsnew/v2.1.4.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Bug fixes
2525
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
2626
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
2727
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
28-
-
28+
- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
2929

3030
.. ---------------------------------------------------------------------------
3131
.. _whatsnew_214.other:

pandas/core/strings/accessor.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def _wrap_result(
259259
fill_value=np.nan,
260260
returns_string: bool = True,
261261
returns_bool: bool = False,
262+
dtype=None,
262263
):
263264
from pandas import (
264265
Index,
@@ -379,29 +380,29 @@ def cons_row(x):
379380
out = out.get_level_values(0)
380381
return out
381382
else:
382-
return Index(result, name=name)
383+
return Index(result, name=name, dtype=dtype)
383384
else:
384385
index = self._orig.index
385386
# This is a mess.
386-
dtype: DtypeObj | str | None
387+
_dtype: DtypeObj | str | None = dtype
387388
vdtype = getattr(result, "dtype", None)
388389
if self._is_string:
389390
if is_bool_dtype(vdtype):
390-
dtype = result.dtype
391+
_dtype = result.dtype
391392
elif returns_string:
392-
dtype = self._orig.dtype
393+
_dtype = self._orig.dtype
393394
else:
394-
dtype = vdtype
395-
else:
396-
dtype = vdtype
395+
_dtype = vdtype
396+
elif vdtype is not None:
397+
_dtype = vdtype
397398

398399
if expand:
399400
cons = self._orig._constructor_expanddim
400-
result = cons(result, columns=name, index=index, dtype=dtype)
401+
result = cons(result, columns=name, index=index, dtype=_dtype)
401402
else:
402403
# Must be a Series
403404
cons = self._orig._constructor
404-
result = cons(result, name=name, index=index, dtype=dtype)
405+
result = cons(result, name=name, index=index, dtype=_dtype)
405406
result = result.__finalize__(self._orig, method="str")
406407
if name is not None and result.ndim == 1:
407408
# __finalize__ might copy over the original name, but we may
@@ -2317,7 +2318,8 @@ def translate(self, table):
23172318
dtype: object
23182319
"""
23192320
result = self._data.array._str_translate(table)
2320-
return self._wrap_result(result)
2321+
dtype = object if self._data.dtype == "object" else None
2322+
return self._wrap_result(result, dtype=dtype)
23212323

23222324
@forbid_nonstring_types(["bytes"])
23232325
def count(self, pat, flags: int = 0):

pandas/tests/strings/test_find_replace.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from pandas.errors import PerformanceWarning
8+
import pandas.util._test_decorators as td
89

910
import pandas as pd
1011
from pandas import (
@@ -893,7 +894,10 @@ def test_find_nan(any_string_dtype):
893894
# --------------------------------------------------------------------------------------
894895

895896

896-
def test_translate(index_or_series, any_string_dtype):
897+
@pytest.mark.parametrize(
898+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
899+
)
900+
def test_translate(index_or_series, any_string_dtype, infer_string):
897901
obj = index_or_series(
898902
["abcdefg", "abcc", "cdddfg", "cdefggg"], dtype=any_string_dtype
899903
)

0 commit comments

Comments
 (0)