From ea52857b11f4101af270037a45e7d01ce042ebdc Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 30 Jan 2021 19:28:04 +0100 Subject: [PATCH] BUG: Regression in astype not casting to bytes --- doc/source/whatsnew/v1.2.2.rst | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/tests/frame/methods/test_astype.py | 5 +++++ pandas/tests/series/methods/test_astype.py | 5 +++++ 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index c5825d0881515..56b3f7f4f0020 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`) +- Fixed regression in :class:`DataFrame.astype` and :class:`Series.astype` not casting to bytes dtype (:issue:`39474`) - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0be3970159fbd..af793478410e8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1221,7 +1221,7 @@ def soft_convert_objects( values = lib.maybe_convert_objects( values, convert_datetime=datetime, convert_timedelta=timedelta ) - except OutOfBoundsDatetime: + except (OutOfBoundsDatetime, ValueError): return values if numeric and is_object_dtype(values.dtype): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eb8bb0fe90e9a..d2b51d28b57f8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2247,7 +2247,7 @@ class ObjectBlock(Block): _can_hold_na = True def _maybe_coerce_values(self, values): - if issubclass(values.dtype.type, (str, bytes)): + if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) return values diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index df98c78e78fb6..46f5a20f38941 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -651,3 +651,8 @@ def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture, request) # For non-NA values, we should match what we get for non-EA str alt = obj.astype(str) assert np.all(alt.iloc[1:] == result.iloc[1:]) + + def test_astype_bytes(self): + # GH#39474 + result = DataFrame(["foo", "bar", "baz"]).astype(bytes) + assert result.dtypes[0] == np.dtype("S3") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 1a141e3201d57..d683503f22f28 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -341,6 +341,11 @@ def test_astype_unicode(self): reload(sys) sys.setdefaultencoding(former_encoding) + def test_astype_bytes(self): + # GH#39474 + result = Series(["foo", "bar", "baz"]).astype(bytes) + assert result.dtypes == np.dtype("S3") + class TestAstypeCategorical: def test_astype_categorical_invalid_conversions(self):