diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index faee9bc57464b..232aabfb87c58 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -53,6 +53,42 @@ def time_unstack(self): self.df.unstack(1) +class ReshapeExtensionDtype: + + params = ["datetime64[ns, US/Pacific]", "Period[s]"] + param_names = ["dtype"] + + def setup(self, dtype): + lev = pd.Index(list("ABCDEFGHIJ")) + ri = pd.Index(range(1000)) + mi = MultiIndex.from_product([lev, ri], names=["foo", "bar"]) + + index = date_range("2016-01-01", periods=10000, freq="s", tz="US/Pacific") + if dtype == "Period[s]": + index = index.tz_localize(None).to_period("s") + + ser = pd.Series(index, index=mi) + df = ser.unstack("bar") + # roundtrips -> df.stack().equals(ser) + + self.ser = ser + self.df = df + + def time_stack(self, dtype): + self.df.stack() + + def time_unstack_fast(self, dtype): + # last level -> doesnt have to make copies + self.ser.unstack("bar") + + def time_unstack_slow(self, dtype): + # first level -> must make copies + self.ser.unstack("foo") + + def time_transpose(self, dtype): + self.df.T + + class Unstack: params = ["int", "category"] diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b95a3455ca905..af5c92ce82a66 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -81,10 +81,7 @@ validate_func_kwargs, ) from pandas.core.apply import GroupByApply -from pandas.core.arrays import ( - Categorical, - ExtensionArray, -) +from pandas.core.arrays import Categorical from pandas.core.base import ( DataError, SpecificationError, @@ -1123,8 +1120,7 @@ def py_fallback(values: ArrayLike) -> ArrayLike: obj: FrameOrSeriesUnion # call our grouper again with only this block - if isinstance(values, ExtensionArray) or values.ndim == 1: - # TODO(EA2D): special case not needed with 2D EAs + if values.ndim == 1: obj = Series(values) else: # TODO special case not needed with ArrayManager diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 97a2d4037bf26..906c95c825cab 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -85,7 +85,10 @@ DataManager, SingleDataManager, ) -from pandas.core.internals.blocks import new_block +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, +) if TYPE_CHECKING: from pandas import Float64Index @@ -497,10 +500,7 @@ def quantile( interpolation="linear", ) -> ArrayManager: - arrs = [ - x if not isinstance(x, np.ndarray) else np.atleast_2d(x) - for x in self.arrays - ] + arrs = [ensure_block_shape(x, 2) for x in self.arrays] assert axis == 1 new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs] for i, arr in enumerate(new_arrs): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e084db77692f5..ed4eca67491d8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -118,6 +118,9 @@ from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +_dtype_obj = np.dtype(object) # comparison is faster than is_object_dtype + + class Block(PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas @@ -278,8 +281,8 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: return an internal format, currently just the ndarray this is often overridden to handle to_dense like operations """ - if is_object_dtype(dtype): - return self.values.astype(object) + if dtype == _dtype_obj: + return self.values.astype(_dtype_obj) return self.values @final diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 0803e40a219be..924d2a77e5da5 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -42,7 +42,10 @@ ExtensionArray, ) from pandas.core.internals.array_manager import ArrayManager -from pandas.core.internals.blocks import new_block +from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, +) from pandas.core.internals.managers import BlockManager if TYPE_CHECKING: @@ -420,12 +423,8 @@ def _concatenate_join_units( # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) - if not is_extension_array_dtype(concat_values.dtype): - # if the result of concat is not an EA but an ndarray, reshape to - # 2D to put it a non-EA Block - # special case DatetimeArray/TimedeltaArray, which *is* an EA, but - # is put in a consolidated 2D block - concat_values = np.atleast_2d(concat_values) + concat_values = ensure_block_shape(concat_values, 2) + else: concat_values = concat_compat(to_concat, axis=concat_axis) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b656c9e83e1a8..8b08a5fd70537 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1226,7 +1226,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False if value.ndim == 2: value = value.T - elif value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype): + elif value.ndim == self.ndim - 1: # TODO(EA2D): special case not needed with 2D EAs value = ensure_block_shape(value, ndim=2) diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 602c4bfd740ca..dbd309f0836a5 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -8,8 +8,6 @@ Tuple, ) -import numpy as np - from pandas._typing import ArrayLike if TYPE_CHECKING: @@ -32,7 +30,7 @@ def _iter_block_pairs( locs = blk.mgr_locs blk_vals = blk.values - left_ea = not isinstance(blk_vals, np.ndarray) + left_ea = blk_vals.ndim == 1 rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True) @@ -43,7 +41,7 @@ def _iter_block_pairs( # assert rblks[0].shape[0] == 1, rblks[0].shape for k, rblk in enumerate(rblks): - right_ea = not isinstance(rblk.values, np.ndarray) + right_ea = rblk.values.ndim == 1 lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea) info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index d5d04ea2b1539..2592492f1c14c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1741,7 +1741,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: # restore NaT elements y[mask] = iNaT # TODO: could try/finally for this? - if isinstance(values, np.ndarray): + if isinstance(values.dtype, np.dtype): result = result.view(orig_dtype) else: # DatetimeArray/TimedeltaArray diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 271bb2ca8dd75..20dd30022c3d9 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -440,7 +440,7 @@ def unstack(obj, level, fill_value=None): obj.index, level=level, constructor=obj._constructor_expanddim ) return unstacker.get_result( - obj.values, value_columns=None, fill_value=fill_value + obj._values, value_columns=None, fill_value=fill_value )