pandas-dev · mroeschke · Feb 15, 2023 · Nov 11, 2022 · Nov 17, 2022 · Nov 17, 2022
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -715,7 +715,10 @@ cpdef ndarray[object] ensure_string_array(
             continue
 
         if not checknull(val):
-            if not util.is_float_object(val):
+            if isinstance(val, bytes):
+                # GH#?? see test_astype_str_from_bytes
+                result[i] = val.decode()
+            elif not util.is_float_object(val):
                 # f"{val}" is faster than str(val)
                 result[i] = f"{val}"
             else:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1031,12 +1031,8 @@ def astype(self, dtype, copy: bool = True):
             new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
 
         else:
-            if dtype == str:
-                # GH#38607 see test_astype_str_from_bytes
-                new_values = values.astype(dtype, copy=copy)
-            else:
-                # GH#13149 specifically use astype_nansafe instead of astype
-                new_values = astype_nansafe(values, dtype=dtype, copy=copy)
+            # GH#13149 specifically use astype_nansafe instead of astype
+            new_values = astype_nansafe(values, dtype=dtype, copy=copy)
 
         # pass copy=False because any copying will be done in the astype above
         if self._is_backward_compat_public_numeric_index:

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -34,6 +34,7 @@
     pa_version_under9p0,
 )
 from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
 
 import pandas as pd
 import pandas._testing as tm
@@ -234,6 +235,29 @@ def test_astype_str(self, data, request):
             )
         super().test_astype_str(data)
 
+    @pytest.mark.parametrize(
+        "nullable_string_dtype",
+        [
+            "string[python]",
+            pytest.param(
+                "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0")
+            ),
+        ],
+    )
+    def test_astype_string(self, data, nullable_string_dtype):
+        # with binary dtype
+        pa_dtype = data.dtype.pyarrow_dtype
+        if pa.types.is_binary(pa_dtype):
+            # in this case we end up doing val.decode() instead of str(val)
+            #  so get e.g. "a" instead of "b'a'"
+            result = pd.Series(data[:5]).astype(nullable_string_dtype)
+            expected = pd.Series(
+                [x.decode() for x in data[:5]], dtype=nullable_string_dtype
+            )
+            self.assert_series_equal(result, expected)
+        else:
+            super().test_astype_string(data, nullable_string_dtype)
+
 
 class TestConstructors(base.BaseConstructorsTests):
     def test_from_dtype(self, data, request):

diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py
@@ -3,6 +3,7 @@
 from pandas import (
     Index,
     NaT,
+    Series,
 )
 import pandas._testing as tm
 
@@ -14,6 +15,12 @@ def test_astype_str_from_bytes():
     expected = Index(["あ", "a"], dtype="object")
     tm.assert_index_equal(result, expected)
 
+    # while we're here, check that Series.astype behaves the same
+
+    result = Series(idx).astype(str)
+    expected = Series(expected)
+    tm.assert_series_equal(result, expected)
+
 
 def test_astype_invalid_nas_to_tdt64_raises():
     # GH#45722 don't cast np.datetime64 NaTs to timedelta64 NaT

diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
@@ -378,7 +378,11 @@ def test_astype_unicode(self):
         former_encoding = None
 
         if sys.getdefaultencoding() == "utf-8":
-            test_series.append(Series(["野菜食べないとやばい".encode()]))
+            item = "野菜食べないとやばい"
+            ser = Series([item.encode()])
+            res = ser.astype("unicode")
+            expected = Series([item])
+            tm.assert_series_equal(res, expected)
 
         for ser in test_series:
             res = ser.astype("unicode")