From 7ab85db556af6b3366bce63ddb101abd19355e0a Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 14 Feb 2021 15:35:06 -0800 Subject: [PATCH] REF: implement HybridBlock --- pandas/core/internals/blocks.py | 135 +++++++++++++------------------- 1 file changed, 55 insertions(+), 80 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8ba6018e743bb..a1886c5d949f3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1931,38 +1931,59 @@ def to_native_types( return self.make_block(res) -class DatetimeLikeBlockMixin(HybridMixin, Block): - """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - - @property - def _holder(self): - return DatetimeArray +class HybridBlock(HybridMixin, Block): + """ + Block backed by an NDArrayBackedExtensionArray + """ - @property - def fill_value(self): - return np.datetime64("NaT", "ns") + def internal_values(self): + # Override to return DatetimeArray and TimedeltaArray + return self.array_values() def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ + values = self.array_values() if is_object_dtype(dtype): # DTA/TDA constructor and astype can handle 2D - return self._holder(self.values).astype(object) - return self.values - - def internal_values(self): - # Override to return DatetimeArray and TimedeltaArray - return self.array_values() - - def array_values(self): - return self._holder._simple_new(self.values) + values = values.astype(object) + # TODO(EA2D): reshape not needed with 2D EAs + return np.asarray(values).reshape(self.shape) def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA return self.array_values().reshape(self.shape)[key] + def putmask(self, mask, new) -> List[Block]: + mask = extract_bool_array(mask) + + if not self._can_hold_element(new): + return self.astype(object).putmask(mask, new) + + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.array_values().reshape(self.shape) + arr = cast("NDArrayBackedExtensionArray", arr) + arr.T.putmask(mask, new) + return [self] + + def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.array_values().reshape(self.shape) + + cond = extract_bool_array(cond) + + try: + res_values = arr.T.where(cond, other).T + except (ValueError, TypeError): + return super().where(other, cond, errors=errors, axis=axis) + + # TODO(EA2D): reshape not needed with 2D EAs + res_values = res_values.reshape(self.values.shape) + nb = self.make_block_same_class(res_values) + return [nb] + def diff(self, n: int, axis: int = 0) -> List[Block]: """ 1st discrete difference. @@ -1987,9 +2008,7 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: values = self.array_values().reshape(self.shape) new_values = values - values.shift(n, axis=axis) - return [ - TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer, ndim=self.ndim) - ] + return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs @@ -1997,40 +2016,27 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo new_values = values.shift(periods, fill_value=fill_value, axis=axis) return [self.make_block_same_class(new_values)] - def to_native_types(self, na_rep="NaT", **kwargs): - """ convert to our native types format """ - arr = self.array_values() - - result = arr._format_native_types(na_rep=na_rep, **kwargs) - return self.make_block(result) - def putmask(self, mask, new) -> List[Block]: - mask = extract_bool_array(mask) - - if not self._can_hold_element(new): - return self.astype(object).putmask(mask, new) +class DatetimeLikeBlockMixin(HybridBlock): + """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) - arr = cast("NDArrayBackedExtensionArray", arr) - arr.T.putmask(mask, new) - return [self] + @property + def _holder(self): + return DatetimeArray - def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: - # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) + @property + def fill_value(self): + return np.datetime64("NaT", "ns") - cond = extract_bool_array(cond) + def array_values(self): + return self._holder._simple_new(self.values) - try: - res_values = arr.T.where(cond, other).T - except (ValueError, TypeError): - return super().where(other, cond, errors=errors, axis=axis) + def to_native_types(self, na_rep="NaT", **kwargs): + """ convert to our native types format """ + arr = self.array_values() - # TODO(EA2D): reshape not needed with 2D EAs - res_values = res_values.reshape(self.values.shape) - nb = self.make_block_same_class(res_values) - return [nb] + result = arr._format_native_types(na_rep=na_rep, **kwargs) + return self.make_block(result) class DatetimeBlock(DatetimeLikeBlockMixin): @@ -2126,37 +2132,6 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: - """ - Returns an ndarray of values. - - Parameters - ---------- - dtype : np.dtype - Only `object`-like dtypes are respected here (not sure - why). - - Returns - ------- - values : ndarray - When ``dtype=object``, then and object-dtype ndarray of - boxed values is returned. Otherwise, an M8[ns] ndarray - is returned. - - DatetimeArray is always 1-d. ``get_values`` will reshape - the return value to be the same dimensionality as the - block. - """ - values = self.values - if is_object_dtype(dtype): - values = values.astype(object) - - # TODO(EA2D): reshape unnecessary with 2D EAs - # Ensure that our shape is correct for DataFrame. - # ExtensionArrays are always 1-D, even in a DataFrame when - # the analogous NumPy-backed column would be a 2-D ndarray. - return np.asarray(values).reshape(self.shape) - def external_values(self): # NB: this is different from np.asarray(self.values), since that # return an object-dtype ndarray of Timestamps.