From 9f7473791a0cdf9a3b9b4bb6d4d3e9dddeb69f2d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Jan 2019 13:18:49 -0600 Subject: [PATCH 1/3] BUG: Ensure .astype doesn't use PandasArray On 0.24.0rc1, it's possible to end up with a PandasArray in internals. ```python In [8]: ser = pd.Series([1, 2]) In [9]: ser.astype(ser.array.dtype)._data.blocks[0] Out[9]: ExtensionBlock: 2 dtype: int64 ``` --- pandas/core/internals/blocks.py | 8 +++++--- pandas/tests/series/test_internals.py | 5 +++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index df764aa4ba666..40e6b4592cbd8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -35,7 +35,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays import ( - Categorical, DatetimeArray, ExtensionArray, TimedeltaArray) + Categorical, DatetimeArray, ExtensionArray, PandasDtype, TimedeltaArray) from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.datetimes import DatetimeIndex @@ -530,6 +530,10 @@ def f(m, v, i): return self.split_and_operate(None, f, False) def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): + dtype = pandas_dtype(dtype) + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs) @@ -591,8 +595,6 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self.make_block(Categorical(self.values, dtype=dtype)) - # convert dtypes if needed - dtype = pandas_dtype(dtype) # astype processing if is_dtype_equal(self.dtype, dtype): if copy: diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 26b868872ee0d..e994e04266e26 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -315,6 +315,11 @@ def test_constructor_no_pandas_array(self): tm.assert_series_equal(ser, result) assert isinstance(result._data.blocks[0], IntBlock) + def test_astype_no_pandas_dtype(self): + ser = pd.Series([1, 2], dtype="int64") + result = ser.astype(ser.array.dtype) + tm.assert_series_equal(result, ser) + def test_from_array(self): result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]')) assert result._data.blocks[0].is_extension is False From 548377fa6029ae5a2fe17f3c21356c0cc9959c51 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 22 Jan 2019 08:41:21 -0600 Subject: [PATCH 2/3] simplify, move --- pandas/core/internals/blocks.py | 19 +++++-------------- pandas/tests/series/test_internals.py | 3 +++ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 40e6b4592cbd8..6a5221e934c3d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -530,10 +530,6 @@ def f(m, v, i): return self.split_and_operate(None, f, False) def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): - dtype = pandas_dtype(dtype) - if isinstance(dtype, PandasDtype): - dtype = dtype.numpy_dtype - return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs) @@ -595,21 +591,16 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self.make_block(Categorical(self.values, dtype=dtype)) + dtype = pandas_dtype(dtype) + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + # astype processing if is_dtype_equal(self.dtype, dtype): if copy: return self.copy() return self - klass = None - if is_sparse(self.values): - # special case sparse, Series[Sparse].astype(object) is sparse - klass = ExtensionBlock - elif is_object_dtype(dtype): - klass = ObjectBlock - elif is_extension_array_dtype(dtype): - klass = ExtensionBlock - try: # force the copy here if values is None: @@ -642,7 +633,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, pass newb = make_block(values, placement=self.mgr_locs, - klass=klass, ndim=self.ndim) + ndim=self.ndim) except Exception: # noqa: E722 if errors == 'raise': raise diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index e994e04266e26..afde41d95e64f 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -316,7 +316,10 @@ def test_constructor_no_pandas_array(self): assert isinstance(result._data.blocks[0], IntBlock) def test_astype_no_pandas_dtype(self): + # https://github.com/pandas-dev/pandas/pull/24866 ser = pd.Series([1, 2], dtype="int64") + # Don't have PandasDtype in the public API, so we use `.array.dtype`, + # which is a PandasDtype. result = ser.astype(ser.array.dtype) tm.assert_series_equal(result, ser) From d6de645fcc6c47a9f74597db5fe0a9f39d308a4b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 22 Jan 2019 09:22:05 -0600 Subject: [PATCH 3/3] Move to make_block --- pandas/core/internals/blocks.py | 11 ++++++++--- pandas/tests/internals/test_internals.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6a5221e934c3d..9275367cc347a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -29,7 +29,7 @@ CategoricalDtype, ExtensionDtype, PandasExtensionDtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass, - ABCSeries) + ABCPandasArray, ABCSeries) from pandas.core.dtypes.missing import ( _isna_compat, array_equivalent, isna, notna) @@ -592,8 +592,6 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self.make_block(Categorical(self.values, dtype=dtype)) dtype = pandas_dtype(dtype) - if isinstance(dtype, PandasDtype): - dtype = dtype.numpy_dtype # astype processing if is_dtype_equal(self.dtype, dtype): @@ -3072,6 +3070,13 @@ def get_block_type(values, dtype=None): def make_block(values, placement, klass=None, ndim=None, dtype=None, fastpath=None): + # Ensure that we don't allow PandasArray / PandasDtype in internals. + # For now, blocks should be backed by ndarrays when possible. + if isinstance(values, ABCPandasArray): + values = values.to_numpy() + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + if fastpath is not None: # GH#19265 pyarrow is passing this warnings.warn("fastpath argument is deprecated, will be removed " diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fe0706efdc4f8..9669076732470 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1294,3 +1294,23 @@ def test_block_shape(): assert (a._data.blocks[0].mgr_locs.indexer == b._data.blocks[0].mgr_locs.indexer) + + +def test_make_block_no_pandas_array(): + # https://github.com/pandas-dev/pandas/pull/24866 + arr = pd.array([1, 2]) + + # PandasArray, no dtype + result = make_block(arr, slice(len(arr))) + assert result.is_integer is True + assert result.is_extension is False + + # PandasArray, PandasDtype + result = make_block(arr, slice(len(arr)), dtype=arr.dtype) + assert result.is_integer is True + assert result.is_extension is False + + # ndarray, PandasDtype + result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype) + assert result.is_integer is True + assert result.is_extension is False