diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 388c5dbf6a7ee..3b7de3d2f1f3e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -234,7 +234,7 @@ Other API changes ^^^^^^^^^^^^^^^^^ - Partially initialized :class:`CategoricalDtype` (i.e. those with ``categories=None`` objects will no longer compare as equal to fully initialized dtype objects. - Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`) -- +- :meth:`DataFrame.astype` and :meth:`Series.astype` for ``bytes`` no longer casting to numpy string dtypes but instead casting to ``object`` dtype (:issue:`39566`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 669bfe08d42b0..3b0542dcf30ac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1268,7 +1268,7 @@ def soft_convert_objects( values = lib.maybe_convert_objects( values, convert_datetime=datetime, convert_timedelta=timedelta ) - except (OutOfBoundsDatetime, ValueError): + except OutOfBoundsDatetime: return values if numeric and is_object_dtype(values.dtype): diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0e52ebf69137c..69642dc2398fd 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -287,7 +287,9 @@ def apply( if not ignore_failures: raise continue - # if not isinstance(applied, ExtensionArray): + if not isinstance(applied, ExtensionArray): + if issubclass(applied.dtype.type, (str, bytes)): + applied = np.array(applied, dtype=object) # # TODO not all EA operations return new EAs (eg astype) # applied = array(applied) result_arrays.append(applied) @@ -413,7 +415,10 @@ def downcast(self) -> ArrayManager: return self.apply_with_block("downcast") def astype(self, dtype, copy: bool = False, errors: str = "raise") -> ArrayManager: - return self.apply("astype", dtype=dtype, copy=copy) # , errors=errors) + # if issubclass(dtype, (str, bytes)): + # dtype = "object" + y = self.apply("astype", dtype=dtype, copy=copy) # , errors=errors) + return y def convert( self, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1377928f71915..5d36724899831 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2219,7 +2219,7 @@ class ObjectBlock(Block): @classmethod def _maybe_coerce_values(cls, values): - if issubclass(values.dtype.type, str): + if issubclass(values.dtype.type, (str, bytes)): values = np.array(values, dtype=object) return values diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 35e958ff3a2b1..8031f03901b8b 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -644,7 +644,8 @@ def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture, request) alt = obj.astype(str) assert np.all(alt.iloc[1:] == result.iloc[1:]) - def test_astype_bytes(self): - # GH#39474 - result = DataFrame(["foo", "bar", "baz"]).astype(bytes) - assert result.dtypes[0] == np.dtype("S3") + @pytest.mark.parametrize("dtype", [bytes, np.string_, np.bytes_]) + def test_astype_bytes(self, dtype): + # GH#39566 + result = DataFrame(["foo", "bar", "baz"]).astype(dtype) + assert result.dtypes[0] == "object" diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index a3785518c860d..df86ef3a5ab8b 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -344,10 +344,11 @@ def test_astype_unicode(self): reload(sys) sys.setdefaultencoding(former_encoding) - def test_astype_bytes(self): + @pytest.mark.parametrize("dtype", [bytes, np.string_, np.bytes_]) + def test_astype_bytes(self, dtype): # GH#39474 - result = Series(["foo", "bar", "baz"]).astype(bytes) - assert result.dtypes == np.dtype("S3") + result = Series(["foo", "bar", "baz"]).astype(dtype) + assert result.dtypes == "object" class TestAstypeCategorical: