Skip to content

Commit db399c2

Browse files
sinhrksjreback
authored andcommitted
BUG: astype(Int64) raises AttributeError (#22869)
1 parent 1a61e26 commit db399c2

File tree

4 files changed

+52
-7
lines changed

4 files changed

+52
-7
lines changed

doc/source/whatsnew/v0.24.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,8 @@ Numeric
784784
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
785785
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`)
786786
- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`)
787-
-
787+
- Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`)
788+
788789

789790
Strings
790791
^^^^^^^

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
is_number,
1919
is_integer, is_bool,
2020
is_bool_dtype,
21-
is_categorical_dtype,
2221
is_numeric_dtype,
2322
is_datetime64_any_dtype,
2423
is_timedelta64_dtype,
@@ -28,6 +27,7 @@
2827
is_re_compilable,
2928
is_period_arraylike,
3029
is_object_dtype,
30+
is_extension_array_dtype,
3131
pandas_dtype)
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
@@ -5258,8 +5258,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
52585258
else:
52595259
results.append(results.append(col.copy() if copy else col))
52605260

5261-
elif is_categorical_dtype(dtype) and self.ndim > 1:
5261+
elif is_extension_array_dtype(dtype) and self.ndim > 1:
52625262
# GH 18099: columnwise conversion to categorical
5263+
# and extension dtype
52635264
results = (self[col].astype(dtype, copy=copy) for col in self)
52645265

52655266
else:

pandas/core/internals/blocks.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -675,11 +675,11 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
675675
if newb.shape != self.shape:
676676
raise TypeError(
677677
"cannot set astype for copy = [{copy}] for dtype "
678-
"({dtype} [{itemsize}]) with smaller itemsize than "
679-
"current ({newb_dtype} [{newb_size}])".format(
678+
"({dtype} [{shape}]) to different shape "
679+
"({newb_dtype} [{newb_shape}])".format(
680680
copy=copy, dtype=self.dtype.name,
681-
itemsize=self.itemsize, newb_dtype=newb.dtype.name,
682-
newb_size=newb.itemsize))
681+
shape=self.shape, newb_dtype=newb.dtype.name,
682+
newb_shape=newb.shape))
683683
return newb
684684

685685
def convert(self, copy=True, **kwargs):

pandas/tests/frame/test_dtypes.py

+43
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.compat import u
1313
from pandas import _np_version_under1p14
1414

15+
from pandas.core.arrays import integer_array
1516
from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype
1617
from pandas.tests.frame.common import TestData
1718
from pandas.util.testing import (assert_series_equal,
@@ -666,6 +667,48 @@ def test_astype_categoricaldtype_class_raises(self, cls):
666667
with tm.assert_raises_regex(TypeError, xpr):
667668
df['A'].astype(cls)
668669

670+
@pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
671+
def test_astype_extension_dtypes(self, dtype):
672+
# GH 22578
673+
df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])
674+
675+
expected1 = pd.DataFrame({'a': integer_array([1, 3, 5],
676+
dtype=dtype),
677+
'b': integer_array([2, 4, 6],
678+
dtype=dtype)})
679+
tm.assert_frame_equal(df.astype(dtype), expected1)
680+
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
681+
tm.assert_frame_equal(df.astype(dtype).astype('float64'), df)
682+
683+
df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])
684+
df['b'] = df['b'].astype(dtype)
685+
expected2 = pd.DataFrame({'a': [1., 3., 5.],
686+
'b': integer_array([2, 4, 6],
687+
dtype=dtype)})
688+
tm.assert_frame_equal(df, expected2)
689+
690+
tm.assert_frame_equal(df.astype(dtype), expected1)
691+
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
692+
693+
@pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
694+
def test_astype_extension_dtypes_1d(self, dtype):
695+
# GH 22578
696+
df = pd.DataFrame({'a': [1., 2., 3.]})
697+
698+
expected1 = pd.DataFrame({'a': integer_array([1, 2, 3],
699+
dtype=dtype)})
700+
tm.assert_frame_equal(df.astype(dtype), expected1)
701+
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
702+
703+
df = pd.DataFrame({'a': [1., 2., 3.]})
704+
df['a'] = df['a'].astype(dtype)
705+
expected2 = pd.DataFrame({'a': integer_array([1, 2, 3],
706+
dtype=dtype)})
707+
tm.assert_frame_equal(df, expected2)
708+
709+
tm.assert_frame_equal(df.astype(dtype), expected1)
710+
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
711+
669712
@pytest.mark.parametrize('dtype', [
670713
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
671714
def test_astype_column_metadata(self, dtype):

0 commit comments

Comments
 (0)