diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index be0828f5303b8..26d0242d81cf2 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -22,6 +22,7 @@ check_ndim, extract_pandas_array, get_block_type, + maybe_coerce_values, ) @@ -58,6 +59,7 @@ def make_block( ndim = _maybe_infer_ndim(values, placement, ndim) check_ndim(values, placement, ndim) + values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ab23c67b52bcd..f70199df40f52 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -160,28 +160,14 @@ def __init__(self, values, placement, ndim: int): Parameters ---------- values : np.ndarray or ExtensionArray + We assume maybe_coerce_values has already been called. placement : BlockPlacement (or castable) ndim : int 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame """ self.ndim = ndim self.mgr_locs = placement - self.values = self._maybe_coerce_values(values) - - @classmethod - def _maybe_coerce_values(cls, values): - """ - Ensure we have correctly-typed values. - - Parameters - ---------- - values : np.ndarray or ExtensionArray - - Returns - ------- - np.ndarray or ExtensionArray - """ - return values + self.values = values @property def _holder(self): @@ -280,6 +266,8 @@ def make_block(self, values, placement=None) -> Block: if self.is_extension: values = ensure_block_shape(values, ndim=self.ndim) + # TODO: perf by not going through new_block + # We assume maybe_coerce_values has already been called return new_block(values, placement=placement, ndim=self.ndim) @final @@ -287,6 +275,8 @@ def make_block_same_class(self, values, placement=None) -> Block: """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs + # TODO: perf by not going through new_block + # We assume maybe_coerce_values has already been called return type(self)(values, placement=placement, ndim=self.ndim) @final @@ -418,6 +408,7 @@ def _split_op_result(self, result) -> List[Block]: return nbs if not isinstance(result, Block): + result = maybe_coerce_values(result) result = self.make_block(result) return [result] @@ -629,6 +620,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): values, dtype, copy=copy, errors=errors # type: ignore[type-var] ) + new_values = maybe_coerce_values(new_values) newb = self.make_block(new_values) if newb.shape != self.shape: raise TypeError( @@ -687,6 +679,7 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs): values = np.array(values, dtype="object") values[mask] = na_rep + values = values.astype(object, copy=False) return self.make_block(values) # block actions # @@ -1540,24 +1533,6 @@ def putmask(self, mask, new) -> List[Block]: new_values[mask] = new return [self.make_block(values=new_values)] - @classmethod - def _maybe_coerce_values(cls, values): - """ - Unbox to an extension array. - - This will unbox an ExtensionArray stored in an Index or Series. - ExtensionArrays pass through. No dtype coercion is done. - - Parameters - ---------- - values : np.ndarray or ExtensionArray - - Returns - ------- - ExtensionArray - """ - return extract_array(values) - @property def _holder(self): # For extension blocks, the holder is values-dependent. @@ -1891,6 +1866,7 @@ def to_native_types( values = np.array(values, dtype="object") values[mask] = na_rep + values = values.astype(object, copy=False) return self.make_block(values) from pandas.io.formats.format import FloatArrayFormatter @@ -1904,6 +1880,7 @@ def to_native_types( fixed_width=False, ) res = formatter.get_result_as_array() + res = res.astype(object, copy=False) return self.make_block(res) @@ -1957,6 +1934,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: # TODO(EA2D): reshape not needed with 2D EAs res_values = res_values.reshape(self.values.shape) + res_values = maybe_coerce_values(res_values) nb = self.make_block_same_class(res_values) return [nb] @@ -1984,12 +1962,14 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: values = self.array_values().reshape(self.shape) new_values = values - values.shift(n, axis=axis) + new_values = maybe_coerce_values(new_values) return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs values = self.array_values().reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) + new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(new_values)] def fillna( @@ -2005,6 +1985,7 @@ def fillna( values = self.array_values() values = values if inplace else values.copy() new_values = values.fillna(value=value, limit=limit) + new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(values=new_values)] @@ -2014,30 +1995,6 @@ class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): is_numeric = False _can_hold_na = True - @classmethod - def _maybe_coerce_values(cls, values): - """ - Input validation for values passed to __init__. Ensure that - we have nanosecond datetime64/timedelta64, coercing if necessary. - - Parameters - ---------- - values : np.ndarray or ExtensionArray - Must be convertible to datetime64/timedelta64 - - Returns - ------- - values : ndarray[datetime64ns/timedelta64ns] - """ - values = extract_array(values, extract_numpy=True) - if isinstance(values, np.ndarray): - values = sanitize_to_nanoseconds(values) - elif isinstance(values.dtype, np.dtype): - # i.e. not datetime64tz - values = values._data - - return values - def array_values(self): return ensure_wrapped_if_datetimelike(self.values) @@ -2054,6 +2011,7 @@ def to_native_types(self, na_rep="NaT", **kwargs): arr = self.array_values() result = arr._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) return self.make_block(result) @@ -2111,12 +2069,6 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - @classmethod - def _maybe_coerce_values(cls, values): - if issubclass(values.dtype.type, str): - values = np.array(values, dtype=object) - return values - @property def is_bool(self): """ @@ -2242,6 +2194,38 @@ def replace( # Constructor Helpers +def maybe_coerce_values(values) -> ArrayLike: + """ + Input validation for values passed to __init__. Ensure that + any datetime64/timedelta64 dtypes are in nanoseconds. Ensure + that we do not have string dtypes. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + + Returns + ------- + values : np.ndarray or ExtensionArray + """ + + # Note: the only test that needs extract_array here is one where we + # pass PandasDtype to Series.astype, then need to extract PandasArray here. + values = extract_array(values, extract_numpy=True) + + if isinstance(values, np.ndarray): + values = sanitize_to_nanoseconds(values) + + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + elif isinstance(values.dtype, np.dtype): + # i.e. not datetime64tz, extract DTA/TDA -> ndarray + values = values._data + + return values + + def get_block_type(values, dtype: Optional[Dtype] = None): """ Find the appropriate Block subclass to use for the given values and dtype. @@ -2300,6 +2284,7 @@ def new_block(values, placement, *, ndim: int, klass=None) -> Block: if klass is None: klass = get_block_type(values, values.dtype) + values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 48bb6d9bf247b..e51ba08b8cf34 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -72,6 +72,7 @@ ensure_block_shape, extend_blocks, get_block_type, + maybe_coerce_values, new_block, ) from pandas.core.internals.ops import ( @@ -1057,6 +1058,7 @@ def iget(self, i: int) -> SingleBlockManager: values = block.iget(self.blklocs[i]) # shortcut for select a single-dim from a 2-dim BM + values = maybe_coerce_values(values) nb = type(block)(values, placement=slice(0, len(values)), ndim=1) return SingleBlockManager(nb, self.axes[1]) @@ -1650,6 +1652,7 @@ def getitem_mgr(self, indexer) -> SingleBlockManager: if array.ndim > blk.values.ndim: # This will be caught by Series._get_values raise ValueError("dimension-expanding indexing not allowed") + block = blk.make_block_same_class(array, placement=slice(0, len(array))) return type(self)(block, self.index[indexer])