diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a4209ba90aaee..40dd48880e0eb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -784,7 +784,8 @@ Numeric - Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) - Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`) - Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) -- +- Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`) + Strings ^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0b5a10283946c..6350e59f1ccc0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -18,7 +18,6 @@ is_number, is_integer, is_bool, is_bool_dtype, - is_categorical_dtype, is_numeric_dtype, is_datetime64_any_dtype, is_timedelta64_dtype, @@ -28,6 +27,7 @@ is_re_compilable, is_period_arraylike, is_object_dtype, + is_extension_array_dtype, pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.inference import is_hashable @@ -5258,8 +5258,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): else: results.append(results.append(col.copy() if copy else col)) - elif is_categorical_dtype(dtype) and self.ndim > 1: + elif is_extension_array_dtype(dtype) and self.ndim > 1: # GH 18099: columnwise conversion to categorical + # and extension dtype results = (self[col].astype(dtype, copy=copy) for col in self) else: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0e57dd33b1c4e..93930fd844b95 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -675,11 +675,11 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, if newb.shape != self.shape: raise TypeError( "cannot set astype for copy = [{copy}] for dtype " - "({dtype} [{itemsize}]) with smaller itemsize than " - "current ({newb_dtype} [{newb_size}])".format( + "({dtype} [{shape}]) to different shape " + "({newb_dtype} [{newb_shape}])".format( copy=copy, dtype=self.dtype.name, - itemsize=self.itemsize, newb_dtype=newb.dtype.name, - newb_size=newb.itemsize)) + shape=self.shape, newb_dtype=newb.dtype.name, + newb_shape=newb.shape)) return newb def convert(self, copy=True, **kwargs): diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index c91370dc36770..2afaeea3755d0 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -12,6 +12,7 @@ from pandas.compat import u from pandas import _np_version_under1p14 +from pandas.core.arrays import integer_array from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, @@ -666,6 +667,48 @@ def test_astype_categoricaldtype_class_raises(self, cls): with tm.assert_raises_regex(TypeError, xpr): df['A'].astype(cls) + @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16']) + def test_astype_extension_dtypes(self, dtype): + # GH 22578 + df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b']) + + expected1 = pd.DataFrame({'a': integer_array([1, 3, 5], + dtype=dtype), + 'b': integer_array([2, 4, 6], + dtype=dtype)}) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) + tm.assert_frame_equal(df.astype(dtype).astype('float64'), df) + + df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b']) + df['b'] = df['b'].astype(dtype) + expected2 = pd.DataFrame({'a': [1., 3., 5.], + 'b': integer_array([2, 4, 6], + dtype=dtype)}) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16']) + def test_astype_extension_dtypes_1d(self, dtype): + # GH 22578 + df = pd.DataFrame({'a': [1., 2., 3.]}) + + expected1 = pd.DataFrame({'a': integer_array([1, 2, 3], + dtype=dtype)}) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) + + df = pd.DataFrame({'a': [1., 2., 3.]}) + df['a'] = df['a'].astype(dtype) + expected2 = pd.DataFrame({'a': integer_array([1, 2, 3], + dtype=dtype)}) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1) + @pytest.mark.parametrize('dtype', [ {100: 'float64', 200: 'uint64'}, 'category', 'float64']) def test_astype_column_metadata(self, dtype):