Skip to content

BUG: Ensure .astype doesn't use PandasArray #24866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
ABCSeries)
ABCPandasArray, ABCSeries)
from pandas.core.dtypes.missing import (
_isna_compat, array_equivalent, isna, notna)

import pandas.core.algorithms as algos
from pandas.core.arrays import (
Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
Categorical, DatetimeArray, ExtensionArray, PandasDtype, TimedeltaArray)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.indexes.datetimes import DatetimeIndex
Expand Down Expand Up @@ -576,23 +576,14 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,

return self.make_block(Categorical(self.values, dtype=dtype))

# convert dtypes if needed
dtype = pandas_dtype(dtype)

# astype processing
if is_dtype_equal(self.dtype, dtype):
if copy:
return self.copy()
return self

klass = None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that this is unnecessary now... The .astype should get us an array of the correct type, and from there we'll get the correct block in make_block.

if is_sparse(self.values):
# special case sparse, Series[Sparse].astype(object) is sparse
klass = ExtensionBlock
elif is_object_dtype(dtype):
klass = ObjectBlock
elif is_extension_array_dtype(dtype):
klass = ExtensionBlock

try:
# force the copy here
if values is None:
Expand Down Expand Up @@ -624,7 +615,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
pass

newb = make_block(values, placement=self.mgr_locs,
klass=klass, ndim=self.ndim)
ndim=self.ndim)
except Exception: # noqa: E722
if errors == 'raise':
raise
Expand Down Expand Up @@ -3041,6 +3032,13 @@ def get_block_type(values, dtype=None):

def make_block(values, placement, klass=None, ndim=None, dtype=None,
fastpath=None):
# Ensure that we don't allow PandasArray / PandasDtype in internals.
# For now, blocks should be backed by ndarrays when possible.
if isinstance(values, ABCPandasArray):
values = values.to_numpy()
if isinstance(dtype, PandasDtype):
dtype = dtype.numpy_dtype

if fastpath is not None:
# GH#19265 pyarrow is passing this
warnings.warn("fastpath argument is deprecated, will be removed "
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1291,3 +1291,23 @@ def test_block_shape():

assert (a._data.blocks[0].mgr_locs.indexer ==
b._data.blocks[0].mgr_locs.indexer)


def test_make_block_no_pandas_array():
# https://github.com/pandas-dev/pandas/pull/24866
arr = pd.array([1, 2])

# PandasArray, no dtype
result = make_block(arr, slice(len(arr)))
assert result.is_integer is True
assert result.is_extension is False

# PandasArray, PandasDtype
result = make_block(arr, slice(len(arr)), dtype=arr.dtype)
assert result.is_integer is True
assert result.is_extension is False

# ndarray, PandasDtype
result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype)
assert result.is_integer is True
assert result.is_extension is False
8 changes: 8 additions & 0 deletions pandas/tests/series/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ def test_constructor_no_pandas_array(self):
tm.assert_series_equal(ser, result)
assert isinstance(result._data.blocks[0], IntBlock)

def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = pd.Series([1, 2], dtype="int64")
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
# which is a PandasDtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)

def test_from_array(self):
result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
assert result._data.blocks[0].is_extension is False
Expand Down