-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: de-duplicate Block.should_store and related #33028
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8174f60
022b517
147efe5
0fdfff2
cd58f3a
92e8616
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -654,6 +654,20 @@ def _can_hold_element(self, element: Any) -> bool: | |
return issubclass(tipo.type, dtype) | ||
return isinstance(element, dtype) | ||
|
||
def should_store(self, value: ArrayLike) -> bool: | ||
""" | ||
Should we set self.values[indexer] = value inplace or do we need to cast? | ||
|
||
Parameters | ||
---------- | ||
value : np.ndarray or ExtensionArray | ||
|
||
Returns | ||
------- | ||
bool | ||
""" | ||
return is_dtype_equal(value.dtype, self.dtype) | ||
|
||
def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): | ||
""" convert to our native types format, slicing if desired """ | ||
values = self.values | ||
|
@@ -1747,10 +1761,7 @@ def setitem(self, indexer, value): | |
|
||
def get_values(self, dtype=None): | ||
# ExtensionArrays must be iterable, so this works. | ||
values = np.asarray(self.values) | ||
if values.ndim == self.ndim - 1: | ||
values = values.reshape((1,) + values.shape) | ||
return values | ||
return np.asarray(self.values).reshape(self.shape) | ||
|
||
def array_values(self) -> ExtensionArray: | ||
return self.values | ||
|
@@ -2016,11 +2027,6 @@ def to_native_types( | |
) | ||
return formatter.get_result_as_array() | ||
|
||
def should_store(self, value: ArrayLike) -> bool: | ||
# when inserting a column should not coerce integers to floats | ||
# unnecessarily | ||
return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype | ||
|
||
|
||
class ComplexBlock(FloatOrComplexBlock): | ||
__slots__ = () | ||
|
@@ -2053,9 +2059,6 @@ def _can_hold_element(self, element: Any) -> bool: | |
) | ||
return is_integer(element) | ||
|
||
def should_store(self, value: ArrayLike) -> bool: | ||
return is_integer_dtype(value) and value.dtype == self.dtype | ||
|
||
|
||
class DatetimeLikeBlockMixin: | ||
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" | ||
|
@@ -2064,9 +2067,6 @@ class DatetimeLikeBlockMixin: | |
def _holder(self): | ||
return DatetimeArray | ||
|
||
def should_store(self, value): | ||
return is_dtype_equal(self.dtype, value.dtype) | ||
|
||
@property | ||
def fill_value(self): | ||
return np.datetime64("NaT", "ns") | ||
|
@@ -2076,15 +2076,17 @@ def get_values(self, dtype=None): | |
return object dtype as boxed values, such as Timestamps/Timedelta | ||
""" | ||
if is_object_dtype(dtype): | ||
values = self.values.ravel() | ||
result = self._holder(values).astype(object) | ||
return result.reshape(self.values.shape) | ||
# DTA/TDA constructor and astype can handle 2D | ||
return self._holder(self.values).astype(object) | ||
return self.values | ||
|
||
def internal_values(self): | ||
# Override to return DatetimeArray and TimedeltaArray | ||
return self.array_values() | ||
|
||
def array_values(self): | ||
return self._holder._simple_new(self.values) | ||
|
||
def iget(self, key): | ||
# GH#31649 we need to wrap scalars in Timestamp/Timedelta | ||
# TODO(EA2D): this can be removed if we ever have 2D EA | ||
|
@@ -2211,12 +2213,6 @@ def set(self, locs, values): | |
|
||
self.values[locs] = values | ||
|
||
def external_values(self): | ||
return np.asarray(self.values.astype("datetime64[ns]", copy=False)) | ||
|
||
def array_values(self) -> ExtensionArray: | ||
return DatetimeArray._simple_new(self.values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is no the same as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True. My thought was that since the DTA constructor is relatively strict, the overhead wouldn't be too bad. Can revert if you have a strong opinion. (and/or we could cache_readonly array_values) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would need to look back or time it again to be sure about the exact impact, but as far as I remember, I did this change when optimizing |
||
|
||
|
||
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): | ||
""" implement a datetime64 block with a tz attribute """ | ||
|
@@ -2229,7 +2225,8 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): | |
_can_hold_element = DatetimeBlock._can_hold_element | ||
to_native_types = DatetimeBlock.to_native_types | ||
fill_value = np.datetime64("NaT", "ns") | ||
should_store = DatetimeBlock.should_store | ||
should_store = Block.should_store | ||
array_values = ExtensionBlock.array_values | ||
|
||
@property | ||
def _holder(self): | ||
|
@@ -2288,14 +2285,16 @@ def get_values(self, dtype=None): | |
if is_object_dtype(dtype): | ||
values = values.astype(object) | ||
|
||
values = np.asarray(values) | ||
# TODO(EA2D): reshape unnecessary with 2D EAs | ||
# Ensure that our shape is correct for DataFrame. | ||
# ExtensionArrays are always 1-D, even in a DataFrame when | ||
# the analogous NumPy-backed column would be a 2-D ndarray. | ||
return np.asarray(values).reshape(self.shape) | ||
|
||
if self.ndim == 2: | ||
# Ensure that our shape is correct for DataFrame. | ||
# ExtensionArrays are always 1-D, even in a DataFrame when | ||
# the analogous NumPy-backed column would be a 2-D ndarray. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you keep this comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do |
||
values = values.reshape(1, -1) | ||
return values | ||
def external_values(self): | ||
# NB: this is different from np.asarray(self.values), since that | ||
# return an object-dtype ndarray of Timestamps. | ||
return np.asarray(self.values.astype("datetime64[ns]", copy=False)) | ||
|
||
def _slice(self, slicer): | ||
""" return a slice of my values """ | ||
|
@@ -2462,12 +2461,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): | |
) | ||
return rvalues | ||
|
||
def external_values(self): | ||
return np.asarray(self.values.astype("timedelta64[ns]", copy=False)) | ||
|
||
def array_values(self) -> ExtensionArray: | ||
return TimedeltaArray._simple_new(self.values) | ||
|
||
|
||
class BoolBlock(NumericBlock): | ||
__slots__ = () | ||
|
@@ -2480,11 +2473,6 @@ def _can_hold_element(self, element: Any) -> bool: | |
return issubclass(tipo.type, np.bool_) | ||
return isinstance(element, (bool, np.bool_)) | ||
|
||
def should_store(self, value: ArrayLike) -> bool: | ||
return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( | ||
value | ||
) | ||
|
||
def replace( | ||
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True | ||
): | ||
|
@@ -2572,15 +2560,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] | |
def _can_hold_element(self, element: Any) -> bool: | ||
return True | ||
|
||
def should_store(self, value: ArrayLike) -> bool: | ||
return not ( | ||
issubclass( | ||
value.dtype.type, | ||
(np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_), | ||
) | ||
or is_extension_array_dtype(value) | ||
) | ||
|
||
def replace( | ||
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True | ||
): | ||
|
@@ -2811,6 +2790,8 @@ class CategoricalBlock(ExtensionBlock): | |
_can_hold_na = True | ||
_concatenator = staticmethod(concat_categorical) | ||
|
||
should_store = Block.should_store | ||
|
||
def __init__(self, values, placement, ndim=None): | ||
# coerce to categorical if we can | ||
values = extract_array(values) | ||
|
@@ -2821,9 +2802,6 @@ def __init__(self, values, placement, ndim=None): | |
def _holder(self): | ||
return Categorical | ||
|
||
def should_store(self, arr: ArrayLike): | ||
return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype) | ||
|
||
def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): | ||
""" convert to our native types format, slicing if desired """ | ||
values = self.values | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment here on why
_holder
can handle 2D?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do