Skip to content

Commit 51980fe

Browse files
TomAugspurgerjreback
authored andcommitted
BUG: Ensure .astype doesn't use PandasArray (pandas-dev#24866)
1 parent f572ec4 commit 51980fe

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

pandas/core/internals/blocks.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
2828
from pandas.core.dtypes.generic import (
2929
ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
30-
ABCSeries)
30+
ABCPandasArray, ABCSeries)
3131
from pandas.core.dtypes.missing import (
3232
_isna_compat, array_equivalent, isna, notna)
3333

3434
import pandas.core.algorithms as algos
3535
from pandas.core.arrays import (
36-
Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
36+
Categorical, DatetimeArray, ExtensionArray, PandasDtype, TimedeltaArray)
3737
from pandas.core.base import PandasObject
3838
import pandas.core.common as com
3939
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -576,23 +576,14 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
576576

577577
return self.make_block(Categorical(self.values, dtype=dtype))
578578

579-
# convert dtypes if needed
580579
dtype = pandas_dtype(dtype)
580+
581581
# astype processing
582582
if is_dtype_equal(self.dtype, dtype):
583583
if copy:
584584
return self.copy()
585585
return self
586586

587-
klass = None
588-
if is_sparse(self.values):
589-
# special case sparse, Series[Sparse].astype(object) is sparse
590-
klass = ExtensionBlock
591-
elif is_object_dtype(dtype):
592-
klass = ObjectBlock
593-
elif is_extension_array_dtype(dtype):
594-
klass = ExtensionBlock
595-
596587
try:
597588
# force the copy here
598589
if values is None:
@@ -624,7 +615,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
624615
pass
625616

626617
newb = make_block(values, placement=self.mgr_locs,
627-
klass=klass, ndim=self.ndim)
618+
ndim=self.ndim)
628619
except Exception: # noqa: E722
629620
if errors == 'raise':
630621
raise
@@ -3041,6 +3032,13 @@ def get_block_type(values, dtype=None):
30413032

30423033
def make_block(values, placement, klass=None, ndim=None, dtype=None,
30433034
fastpath=None):
3035+
# Ensure that we don't allow PandasArray / PandasDtype in internals.
3036+
# For now, blocks should be backed by ndarrays when possible.
3037+
if isinstance(values, ABCPandasArray):
3038+
values = values.to_numpy()
3039+
if isinstance(dtype, PandasDtype):
3040+
dtype = dtype.numpy_dtype
3041+
30443042
if fastpath is not None:
30453043
# GH#19265 pyarrow is passing this
30463044
warnings.warn("fastpath argument is deprecated, will be removed "

pandas/tests/internals/test_internals.py

+20
Original file line numberDiff line numberDiff line change
@@ -1291,3 +1291,23 @@ def test_block_shape():
12911291

12921292
assert (a._data.blocks[0].mgr_locs.indexer ==
12931293
b._data.blocks[0].mgr_locs.indexer)
1294+
1295+
1296+
def test_make_block_no_pandas_array():
1297+
# https://github.com/pandas-dev/pandas/pull/24866
1298+
arr = pd.array([1, 2])
1299+
1300+
# PandasArray, no dtype
1301+
result = make_block(arr, slice(len(arr)))
1302+
assert result.is_integer is True
1303+
assert result.is_extension is False
1304+
1305+
# PandasArray, PandasDtype
1306+
result = make_block(arr, slice(len(arr)), dtype=arr.dtype)
1307+
assert result.is_integer is True
1308+
assert result.is_extension is False
1309+
1310+
# ndarray, PandasDtype
1311+
result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype)
1312+
assert result.is_integer is True
1313+
assert result.is_extension is False

pandas/tests/series/test_internals.py

+8
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,14 @@ def test_constructor_no_pandas_array(self):
312312
tm.assert_series_equal(ser, result)
313313
assert isinstance(result._data.blocks[0], IntBlock)
314314

315+
def test_astype_no_pandas_dtype(self):
316+
# https://github.com/pandas-dev/pandas/pull/24866
317+
ser = pd.Series([1, 2], dtype="int64")
318+
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
319+
# which is a PandasDtype.
320+
result = ser.astype(ser.array.dtype)
321+
tm.assert_series_equal(result, ser)
322+
315323
def test_from_array(self):
316324
result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
317325
assert result._data.blocks[0].is_extension is False

0 commit comments

Comments
 (0)