Skip to content

Commit c47e9ca

Browse files
authored
CLN: de-duplicate Block.should_store and related (#33028)
1 parent e872067 commit c47e9ca

File tree

1 file changed

+33
-55
lines changed

1 file changed

+33
-55
lines changed

pandas/core/internals/blocks.py

+33-55
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,20 @@ def _can_hold_element(self, element: Any) -> bool:
654654
return issubclass(tipo.type, dtype)
655655
return isinstance(element, dtype)
656656

657+
def should_store(self, value: ArrayLike) -> bool:
658+
"""
659+
Should we set self.values[indexer] = value inplace or do we need to cast?
660+
661+
Parameters
662+
----------
663+
value : np.ndarray or ExtensionArray
664+
665+
Returns
666+
-------
667+
bool
668+
"""
669+
return is_dtype_equal(value.dtype, self.dtype)
670+
657671
def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
658672
""" convert to our native types format, slicing if desired """
659673
values = self.values
@@ -1747,10 +1761,7 @@ def setitem(self, indexer, value):
17471761

17481762
def get_values(self, dtype=None):
17491763
# ExtensionArrays must be iterable, so this works.
1750-
values = np.asarray(self.values)
1751-
if values.ndim == self.ndim - 1:
1752-
values = values.reshape((1,) + values.shape)
1753-
return values
1764+
return np.asarray(self.values).reshape(self.shape)
17541765

17551766
def array_values(self) -> ExtensionArray:
17561767
return self.values
@@ -2016,11 +2027,6 @@ def to_native_types(
20162027
)
20172028
return formatter.get_result_as_array()
20182029

2019-
def should_store(self, value: ArrayLike) -> bool:
2020-
# when inserting a column should not coerce integers to floats
2021-
# unnecessarily
2022-
return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype
2023-
20242030

20252031
class ComplexBlock(FloatOrComplexBlock):
20262032
__slots__ = ()
@@ -2053,9 +2059,6 @@ def _can_hold_element(self, element: Any) -> bool:
20532059
)
20542060
return is_integer(element)
20552061

2056-
def should_store(self, value: ArrayLike) -> bool:
2057-
return is_integer_dtype(value) and value.dtype == self.dtype
2058-
20592062

20602063
class DatetimeLikeBlockMixin:
20612064
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
@@ -2064,9 +2067,6 @@ class DatetimeLikeBlockMixin:
20642067
def _holder(self):
20652068
return DatetimeArray
20662069

2067-
def should_store(self, value):
2068-
return is_dtype_equal(self.dtype, value.dtype)
2069-
20702070
@property
20712071
def fill_value(self):
20722072
return np.datetime64("NaT", "ns")
@@ -2076,15 +2076,17 @@ def get_values(self, dtype=None):
20762076
return object dtype as boxed values, such as Timestamps/Timedelta
20772077
"""
20782078
if is_object_dtype(dtype):
2079-
values = self.values.ravel()
2080-
result = self._holder(values).astype(object)
2081-
return result.reshape(self.values.shape)
2079+
# DTA/TDA constructor and astype can handle 2D
2080+
return self._holder(self.values).astype(object)
20822081
return self.values
20832082

20842083
def internal_values(self):
20852084
# Override to return DatetimeArray and TimedeltaArray
20862085
return self.array_values()
20872086

2087+
def array_values(self):
2088+
return self._holder._simple_new(self.values)
2089+
20882090
def iget(self, key):
20892091
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
20902092
# TODO(EA2D): this can be removed if we ever have 2D EA
@@ -2211,12 +2213,6 @@ def set(self, locs, values):
22112213

22122214
self.values[locs] = values
22132215

2214-
def external_values(self):
2215-
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
2216-
2217-
def array_values(self) -> ExtensionArray:
2218-
return DatetimeArray._simple_new(self.values)
2219-
22202216

22212217
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
22222218
""" implement a datetime64 block with a tz attribute """
@@ -2229,7 +2225,8 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
22292225
_can_hold_element = DatetimeBlock._can_hold_element
22302226
to_native_types = DatetimeBlock.to_native_types
22312227
fill_value = np.datetime64("NaT", "ns")
2232-
should_store = DatetimeBlock.should_store
2228+
should_store = Block.should_store
2229+
array_values = ExtensionBlock.array_values
22332230

22342231
@property
22352232
def _holder(self):
@@ -2288,14 +2285,16 @@ def get_values(self, dtype=None):
22882285
if is_object_dtype(dtype):
22892286
values = values.astype(object)
22902287

2291-
values = np.asarray(values)
2288+
# TODO(EA2D): reshape unnecessary with 2D EAs
2289+
# Ensure that our shape is correct for DataFrame.
2290+
# ExtensionArrays are always 1-D, even in a DataFrame when
2291+
# the analogous NumPy-backed column would be a 2-D ndarray.
2292+
return np.asarray(values).reshape(self.shape)
22922293

2293-
if self.ndim == 2:
2294-
# Ensure that our shape is correct for DataFrame.
2295-
# ExtensionArrays are always 1-D, even in a DataFrame when
2296-
# the analogous NumPy-backed column would be a 2-D ndarray.
2297-
values = values.reshape(1, -1)
2298-
return values
2294+
def external_values(self):
2295+
# NB: this is different from np.asarray(self.values), since that
2296+
# return an object-dtype ndarray of Timestamps.
2297+
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
22992298

23002299
def _slice(self, slicer):
23012300
""" return a slice of my values """
@@ -2462,12 +2461,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
24622461
)
24632462
return rvalues
24642463

2465-
def external_values(self):
2466-
return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
2467-
2468-
def array_values(self) -> ExtensionArray:
2469-
return TimedeltaArray._simple_new(self.values)
2470-
24712464

24722465
class BoolBlock(NumericBlock):
24732466
__slots__ = ()
@@ -2480,11 +2473,6 @@ def _can_hold_element(self, element: Any) -> bool:
24802473
return issubclass(tipo.type, np.bool_)
24812474
return isinstance(element, (bool, np.bool_))
24822475

2483-
def should_store(self, value: ArrayLike) -> bool:
2484-
return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype(
2485-
value
2486-
)
2487-
24882476
def replace(
24892477
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
24902478
):
@@ -2572,15 +2560,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
25722560
def _can_hold_element(self, element: Any) -> bool:
25732561
return True
25742562

2575-
def should_store(self, value: ArrayLike) -> bool:
2576-
return not (
2577-
issubclass(
2578-
value.dtype.type,
2579-
(np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_),
2580-
)
2581-
or is_extension_array_dtype(value)
2582-
)
2583-
25842563
def replace(
25852564
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
25862565
):
@@ -2811,6 +2790,8 @@ class CategoricalBlock(ExtensionBlock):
28112790
_can_hold_na = True
28122791
_concatenator = staticmethod(concat_categorical)
28132792

2793+
should_store = Block.should_store
2794+
28142795
def __init__(self, values, placement, ndim=None):
28152796
# coerce to categorical if we can
28162797
values = extract_array(values)
@@ -2821,9 +2802,6 @@ def __init__(self, values, placement, ndim=None):
28212802
def _holder(self):
28222803
return Categorical
28232804

2824-
def should_store(self, arr: ArrayLike):
2825-
return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype)
2826-
28272805
def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
28282806
""" convert to our native types format, slicing if desired """
28292807
values = self.values

0 commit comments

Comments
 (0)