Skip to content

Commit 152f868

Browse files
authored
REF: implement HybridBlock (#39814)
1 parent fb81567 commit 152f868

File tree

1 file changed

+80
-105
lines changed

1 file changed

+80
-105
lines changed

pandas/core/internals/blocks.py

+80-105
Original file line numberDiff line numberDiff line change
@@ -1956,64 +1956,59 @@ def to_native_types(
19561956
return self.make_block(res)
19571957

19581958

1959-
class DatetimeLikeBlockMixin(HybridMixin, Block):
1960-
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
1961-
1962-
_dtype: np.dtype
1963-
_holder: Type[Union[DatetimeArray, TimedeltaArray]]
1964-
1965-
@classmethod
1966-
def _maybe_coerce_values(cls, values):
1967-
"""
1968-
Input validation for values passed to __init__. Ensure that
1969-
we have nanosecond datetime64/timedelta64, coercing if necessary.
1970-
1971-
Parameters
1972-
----------
1973-
values : array-like
1974-
Must be convertible to datetime64/timedelta64
1975-
1976-
Returns
1977-
-------
1978-
values : ndarray[datetime64ns/timedelta64ns]
1979-
1980-
Overridden by DatetimeTZBlock.
1981-
"""
1982-
if values.dtype != cls._dtype:
1983-
# non-nano we will convert to nano
1984-
if values.dtype.kind != cls._dtype.kind:
1985-
# caller is responsible for ensuring td64/dt64 dtype
1986-
raise TypeError(values.dtype) # pragma: no cover
1987-
1988-
values = cls._holder._from_sequence(values)._data
1989-
1990-
if isinstance(values, cls._holder):
1991-
values = values._data
1959+
class NDArrayBackedExtensionBlock(HybridMixin, Block):
1960+
"""
1961+
Block backed by an NDArrayBackedExtensionArray
1962+
"""
19921963

1993-
assert isinstance(values, np.ndarray), type(values)
1994-
return values
1964+
def internal_values(self):
1965+
# Override to return DatetimeArray and TimedeltaArray
1966+
return self.array_values()
19951967

19961968
def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
19971969
"""
19981970
return object dtype as boxed values, such as Timestamps/Timedelta
19991971
"""
1972+
values = self.array_values()
20001973
if is_object_dtype(dtype):
20011974
# DTA/TDA constructor and astype can handle 2D
2002-
return self._holder(self.values).astype(object)
2003-
return self.values
2004-
2005-
def internal_values(self):
2006-
# Override to return DatetimeArray and TimedeltaArray
2007-
return self.array_values()
2008-
2009-
def array_values(self):
2010-
return self._holder._simple_new(self.values)
1975+
values = values.astype(object)
1976+
# TODO(EA2D): reshape not needed with 2D EAs
1977+
return np.asarray(values).reshape(self.shape)
20111978

20121979
def iget(self, key):
20131980
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
20141981
# TODO(EA2D): this can be removed if we ever have 2D EA
20151982
return self.array_values().reshape(self.shape)[key]
20161983

1984+
def putmask(self, mask, new) -> List[Block]:
1985+
mask = extract_bool_array(mask)
1986+
1987+
if not self._can_hold_element(new):
1988+
return self.astype(object).putmask(mask, new)
1989+
1990+
# TODO(EA2D): reshape unnecessary with 2D EAs
1991+
arr = self.array_values().reshape(self.shape)
1992+
arr = cast("NDArrayBackedExtensionArray", arr)
1993+
arr.T.putmask(mask, new)
1994+
return [self]
1995+
1996+
def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
1997+
# TODO(EA2D): reshape unnecessary with 2D EAs
1998+
arr = self.array_values().reshape(self.shape)
1999+
2000+
cond = extract_bool_array(cond)
2001+
2002+
try:
2003+
res_values = arr.T.where(cond, other).T
2004+
except (ValueError, TypeError):
2005+
return super().where(other, cond, errors=errors, axis=axis)
2006+
2007+
# TODO(EA2D): reshape not needed with 2D EAs
2008+
res_values = res_values.reshape(self.values.shape)
2009+
nb = self.make_block_same_class(res_values)
2010+
return [nb]
2011+
20172012
def diff(self, n: int, axis: int = 0) -> List[Block]:
20182013
"""
20192014
1st discrete difference.
@@ -2038,50 +2033,61 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
20382033
values = self.array_values().reshape(self.shape)
20392034

20402035
new_values = values - values.shift(n, axis=axis)
2041-
return [
2042-
TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer, ndim=self.ndim)
2043-
]
2036+
return [self.make_block(new_values)]
20442037

20452038
def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
20462039
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
2047-
values = self.array_values()
2040+
values = self.array_values().reshape(self.shape)
20482041
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
20492042
return [self.make_block_same_class(new_values)]
20502043

2051-
def to_native_types(self, na_rep="NaT", **kwargs):
2052-
""" convert to our native types format """
2053-
arr = self.array_values()
20542044

2055-
result = arr._format_native_types(na_rep=na_rep, **kwargs)
2056-
return self.make_block(result)
2045+
class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
2046+
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
20572047

2058-
def putmask(self, mask, new) -> List[Block]:
2059-
mask = extract_bool_array(mask)
2048+
_dtype: np.dtype
2049+
_holder: Type[Union[DatetimeArray, TimedeltaArray]]
20602050

2061-
if not self._can_hold_element(new):
2062-
return self.astype(object).putmask(mask, new)
2051+
@classmethod
2052+
def _maybe_coerce_values(cls, values):
2053+
"""
2054+
Input validation for values passed to __init__. Ensure that
2055+
we have nanosecond datetime64/timedelta64, coercing if necessary.
20632056
2064-
# TODO(EA2D): reshape unnecessary with 2D EAs
2065-
arr = self.array_values().reshape(self.shape)
2066-
arr = cast("NDArrayBackedExtensionArray", arr)
2067-
arr.T.putmask(mask, new)
2068-
return [self]
2057+
Parameters
2058+
----------
2059+
values : array-like
2060+
Must be convertible to datetime64/timedelta64
20692061
2070-
def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
2071-
# TODO(EA2D): reshape unnecessary with 2D EAs
2072-
arr = self.array_values().reshape(self.shape)
2062+
Returns
2063+
-------
2064+
values : ndarray[datetime64ns/timedelta64ns]
20732065
2074-
cond = extract_bool_array(cond)
2066+
Overridden by DatetimeTZBlock.
2067+
"""
2068+
if values.dtype != cls._dtype:
2069+
# non-nano we will convert to nano
2070+
if values.dtype.kind != cls._dtype.kind:
2071+
# caller is responsible for ensuring td64/dt64 dtype
2072+
raise TypeError(values.dtype) # pragma: no cover
20752073

2076-
try:
2077-
res_values = arr.T.where(cond, other).T
2078-
except (ValueError, TypeError):
2079-
return super().where(other, cond, errors=errors, axis=axis)
2074+
values = cls._holder._from_sequence(values)._data
20802075

2081-
# TODO(EA2D): reshape not needed with 2D EAs
2082-
res_values = res_values.reshape(self.values.shape)
2083-
nb = self.make_block_same_class(res_values)
2084-
return [nb]
2076+
if isinstance(values, cls._holder):
2077+
values = values._data
2078+
2079+
assert isinstance(values, np.ndarray), type(values)
2080+
return values
2081+
2082+
def array_values(self):
2083+
return self._holder._simple_new(self.values)
2084+
2085+
def to_native_types(self, na_rep="NaT", **kwargs):
2086+
""" convert to our native types format """
2087+
arr = self.array_values()
2088+
2089+
result = arr._format_native_types(na_rep=na_rep, **kwargs)
2090+
return self.make_block(result)
20852091

20862092

20872093
class DatetimeBlock(DatetimeLikeBlockMixin):
@@ -2153,37 +2159,6 @@ def is_view(self) -> bool:
21532159
# check the ndarray values of the DatetimeIndex values
21542160
return self.values._data.base is not None
21552161

2156-
def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
2157-
"""
2158-
Returns an ndarray of values.
2159-
2160-
Parameters
2161-
----------
2162-
dtype : np.dtype
2163-
Only `object`-like dtypes are respected here (not sure
2164-
why).
2165-
2166-
Returns
2167-
-------
2168-
values : ndarray
2169-
When ``dtype=object``, then and object-dtype ndarray of
2170-
boxed values is returned. Otherwise, an M8[ns] ndarray
2171-
is returned.
2172-
2173-
DatetimeArray is always 1-d. ``get_values`` will reshape
2174-
the return value to be the same dimensionality as the
2175-
block.
2176-
"""
2177-
values = self.values
2178-
if is_object_dtype(dtype):
2179-
values = values.astype(object)
2180-
2181-
# TODO(EA2D): reshape unnecessary with 2D EAs
2182-
# Ensure that our shape is correct for DataFrame.
2183-
# ExtensionArrays are always 1-D, even in a DataFrame when
2184-
# the analogous NumPy-backed column would be a 2-D ndarray.
2185-
return np.asarray(values).reshape(self.shape)
2186-
21872162
def external_values(self):
21882163
# NB: this is different from np.asarray(self.values), since that
21892164
# return an object-dtype ndarray of Timestamps.

0 commit comments

Comments
 (0)