Skip to content

BUG: astype(Int64) raises AttributeError #22869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,8 @@ Numeric
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`)
- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`)
-
- Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`)


Strings
^^^^^^^
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
is_number,
is_integer, is_bool,
is_bool_dtype,
is_categorical_dtype,
is_numeric_dtype,
is_datetime64_any_dtype,
is_timedelta64_dtype,
Expand All @@ -28,6 +27,7 @@
is_re_compilable,
is_period_arraylike,
is_object_dtype,
is_extension_array_dtype,
pandas_dtype)
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.inference import is_hashable
Expand Down Expand Up @@ -5258,8 +5258,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
else:
results.append(results.append(col.copy() if copy else col))

elif is_categorical_dtype(dtype) and self.ndim > 1:
elif is_extension_array_dtype(dtype) and self.ndim > 1:
# GH 18099: columnwise conversion to categorical
# and extension dtype
results = (self[col].astype(dtype, copy=copy) for col in self)

else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,11 +675,11 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
if newb.shape != self.shape:
raise TypeError(
"cannot set astype for copy = [{copy}] for dtype "
"({dtype} [{itemsize}]) with smaller itemsize than "
"current ({newb_dtype} [{newb_size}])".format(
"({dtype} [{shape}]) to different shape "
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the message not to raise AttributeError.

"({newb_dtype} [{newb_shape}])".format(
copy=copy, dtype=self.dtype.name,
itemsize=self.itemsize, newb_dtype=newb.dtype.name,
newb_size=newb.itemsize))
shape=self.shape, newb_dtype=newb.dtype.name,
newb_shape=newb.shape))
return newb

def convert(self, copy=True, **kwargs):
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.compat import u
from pandas import _np_version_under1p14

from pandas.core.arrays import integer_array
from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype
from pandas.tests.frame.common import TestData
from pandas.util.testing import (assert_series_equal,
Expand Down Expand Up @@ -666,6 +667,48 @@ def test_astype_categoricaldtype_class_raises(self, cls):
with tm.assert_raises_regex(TypeError, xpr):
df['A'].astype(cls)

@pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
def test_astype_extension_dtypes(self, dtype):
# GH 22578
df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])

expected1 = pd.DataFrame({'a': integer_array([1, 3, 5],
dtype=dtype),
'b': integer_array([2, 4, 6],
dtype=dtype)})
tm.assert_frame_equal(df.astype(dtype), expected1)
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
tm.assert_frame_equal(df.astype(dtype).astype('float64'), df)

df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])
df['b'] = df['b'].astype(dtype)
expected2 = pd.DataFrame({'a': [1., 3., 5.],
'b': integer_array([2, 4, 6],
dtype=dtype)})
tm.assert_frame_equal(df, expected2)

tm.assert_frame_equal(df.astype(dtype), expected1)
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

@pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
def test_astype_extension_dtypes_1d(self, dtype):
# GH 22578
df = pd.DataFrame({'a': [1., 2., 3.]})

expected1 = pd.DataFrame({'a': integer_array([1, 2, 3],
dtype=dtype)})
tm.assert_frame_equal(df.astype(dtype), expected1)
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

df = pd.DataFrame({'a': [1., 2., 3.]})
df['a'] = df['a'].astype(dtype)
expected2 = pd.DataFrame({'a': integer_array([1, 2, 3],
dtype=dtype)})
tm.assert_frame_equal(df, expected2)

tm.assert_frame_equal(df.astype(dtype), expected1)
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

@pytest.mark.parametrize('dtype', [
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
def test_astype_column_metadata(self, dtype):
Expand Down