Skip to content

REF: implement HybridBlock #39814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 16, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 80 additions & 105 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,64 +1935,59 @@ def to_native_types(
return self.make_block(res)


class DatetimeLikeBlockMixin(HybridMixin, Block):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

_dtype: np.dtype
_holder: Type[Union[DatetimeArray, TimedeltaArray]]

@classmethod
def _maybe_coerce_values(cls, values):
"""
Input validation for values passed to __init__. Ensure that
we have nanosecond datetime64/timedelta64, coercing if necessary.

Parameters
----------
values : array-like
Must be convertible to datetime64/timedelta64

Returns
-------
values : ndarray[datetime64ns/timedelta64ns]

Overridden by DatetimeTZBlock.
"""
if values.dtype != cls._dtype:
# non-nano we will convert to nano
if values.dtype.kind != cls._dtype.kind:
# caller is responsible for ensuring td64/dt64 dtype
raise TypeError(values.dtype) # pragma: no cover

values = cls._holder._from_sequence(values)._data

if isinstance(values, cls._holder):
values = values._data
class NDArrayBackedExtensionBlock(HybridMixin, Block):
"""
Block backed by an NDArrayBackedExtensionArray
"""

assert isinstance(values, np.ndarray), type(values)
return values
def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()

def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
values = self.array_values()
if is_object_dtype(dtype):
# DTA/TDA constructor and astype can handle 2D
return self._holder(self.values).astype(object)
return self.values

def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()

def array_values(self):
return self._holder._simple_new(self.values)
values = values.astype(object)
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)

def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
return self.array_values().reshape(self.shape)[key]

def putmask(self, mask, new) -> List[Block]:
mask = extract_bool_array(mask)

if not self._can_hold_element(new):
return self.astype(object).putmask(mask, new)

# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
arr = cast("NDArrayBackedExtensionArray", arr)
arr.T.putmask(mask, new)
return [self]

def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)

cond = extract_bool_array(cond)

try:
res_values = arr.T.where(cond, other).T
except (ValueError, TypeError):
return super().where(other, cond, errors=errors, axis=axis)

# TODO(EA2D): reshape not needed with 2D EAs
res_values = res_values.reshape(self.values.shape)
nb = self.make_block_same_class(res_values)
return [nb]

def diff(self, n: int, axis: int = 0) -> List[Block]:
"""
1st discrete difference.
Expand All @@ -2017,50 +2012,61 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
values = self.array_values().reshape(self.shape)

new_values = values - values.shift(n, axis=axis)
return [
TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer, ndim=self.ndim)
]
return [self.make_block(new_values)]

def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.array_values()
values = self.array_values().reshape(self.shape)
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
return [self.make_block_same_class(new_values)]

def to_native_types(self, na_rep="NaT", **kwargs):
""" convert to our native types format """
arr = self.array_values()

result = arr._format_native_types(na_rep=na_rep, **kwargs)
return self.make_block(result)
class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

def putmask(self, mask, new) -> List[Block]:
mask = extract_bool_array(mask)
_dtype: np.dtype
_holder: Type[Union[DatetimeArray, TimedeltaArray]]

if not self._can_hold_element(new):
return self.astype(object).putmask(mask, new)
@classmethod
def _maybe_coerce_values(cls, values):
"""
Input validation for values passed to __init__. Ensure that
we have nanosecond datetime64/timedelta64, coercing if necessary.

# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
arr = cast("NDArrayBackedExtensionArray", arr)
arr.T.putmask(mask, new)
return [self]
Parameters
----------
values : array-like
Must be convertible to datetime64/timedelta64

def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
Returns
-------
values : ndarray[datetime64ns/timedelta64ns]

cond = extract_bool_array(cond)
Overridden by DatetimeTZBlock.
"""
if values.dtype != cls._dtype:
# non-nano we will convert to nano
if values.dtype.kind != cls._dtype.kind:
# caller is responsible for ensuring td64/dt64 dtype
raise TypeError(values.dtype) # pragma: no cover

try:
res_values = arr.T.where(cond, other).T
except (ValueError, TypeError):
return super().where(other, cond, errors=errors, axis=axis)
values = cls._holder._from_sequence(values)._data

# TODO(EA2D): reshape not needed with 2D EAs
res_values = res_values.reshape(self.values.shape)
nb = self.make_block_same_class(res_values)
return [nb]
if isinstance(values, cls._holder):
values = values._data

assert isinstance(values, np.ndarray), type(values)
return values

def array_values(self):
return self._holder._simple_new(self.values)

def to_native_types(self, na_rep="NaT", **kwargs):
""" convert to our native types format """
arr = self.array_values()

result = arr._format_native_types(na_rep=na_rep, **kwargs)
return self.make_block(result)


class DatetimeBlock(DatetimeLikeBlockMixin):
Expand Down Expand Up @@ -2133,37 +2139,6 @@ def is_view(self) -> bool:
# check the ndarray values of the DatetimeIndex values
return self.values._data.base is not None

def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
Returns an ndarray of values.

Parameters
----------
dtype : np.dtype
Only `object`-like dtypes are respected here (not sure
why).

Returns
-------
values : ndarray
When ``dtype=object``, then and object-dtype ndarray of
boxed values is returned. Otherwise, an M8[ns] ndarray
is returned.

DatetimeArray is always 1-d. ``get_values`` will reshape
the return value to be the same dimensionality as the
block.
"""
values = self.values
if is_object_dtype(dtype):
values = values.astype(object)

# TODO(EA2D): reshape unnecessary with 2D EAs
# Ensure that our shape is correct for DataFrame.
# ExtensionArrays are always 1-D, even in a DataFrame when
# the analogous NumPy-backed column would be a 2-D ndarray.
return np.asarray(values).reshape(self.shape)

def external_values(self):
# NB: this is different from np.asarray(self.values), since that
# return an object-dtype ndarray of Timestamps.
Expand Down