From 8174f6016c70902957af9f84ef73fe9a098398ee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Mar 2020 16:11:48 -0700 Subject: [PATCH 1/3] CLN: de-duplicate should_store, get_values, array_values --- pandas/core/internals/blocks.py | 83 +++++++++++---------------------- 1 file changed, 28 insertions(+), 55 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 935ff09585b17..6ee857eaf10ff 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -653,6 +653,20 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, dtype) return isinstance(element, dtype) + def should_store(self, value: ArrayLike) -> bool: + """ + Should we set self.values[indexer] = value inplace or do we need to cast? + + Parameters + ---------- + value : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + return is_dtype_equal(value.dtype, self.dtype) + def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values @@ -1752,10 +1766,7 @@ def setitem(self, indexer, value): def get_values(self, dtype=None): # ExtensionArrays must be iterable, so this works. - values = np.asarray(self.values) - if values.ndim == self.ndim - 1: - values = values.reshape((1,) + values.shape) - return values + return np.asarray(self.values).reshape(self.shape) def array_values(self) -> ExtensionArray: return self.values @@ -2021,11 +2032,6 @@ def to_native_types( ) return formatter.get_result_as_array() - def should_store(self, value: ArrayLike) -> bool: - # when inserting a column should not coerce integers to floats - # unnecessarily - return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype - class ComplexBlock(FloatOrComplexBlock): __slots__ = () @@ -2058,9 +2064,6 @@ def _can_hold_element(self, element: Any) -> bool: ) return is_integer(element) - def should_store(self, value: ArrayLike) -> bool: - return is_integer_dtype(value) and value.dtype == self.dtype - class DatetimeLikeBlockMixin: """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" @@ -2069,9 +2072,6 @@ class DatetimeLikeBlockMixin: def _holder(self): return DatetimeArray - def should_store(self, value): - return is_dtype_equal(self.dtype, value.dtype) - @property def fill_value(self): return np.datetime64("NaT", "ns") @@ -2081,15 +2081,16 @@ def get_values(self, dtype=None): return object dtype as boxed values, such as Timestamps/Timedelta """ if is_object_dtype(dtype): - values = self.values.ravel() - result = self._holder(values).astype(object) - return result.reshape(self.values.shape) + return self._holder(self.values).astype(object) return self.values def internal_values(self): # Override to return DatetimeArray and TimedeltaArray return self.array_values() + def array_values(self): + return self._holder(self.values) + def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA @@ -2216,12 +2217,6 @@ def set(self, locs, values): self.values[locs] = values - def external_values(self): - return np.asarray(self.values.astype("datetime64[ns]", copy=False)) - - def array_values(self) -> ExtensionArray: - return DatetimeArray._simple_new(self.values) - class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ @@ -2234,7 +2229,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") - should_store = DatetimeBlock.should_store + should_store = Block.should_store @property def _holder(self): @@ -2293,14 +2288,13 @@ def get_values(self, dtype=None): if is_object_dtype(dtype): values = values.astype(object) - values = np.asarray(values) + # TODO(EA2D): reshape uuncessary with 2D EAs + return np.asarray(values).reshape(self.shape) - if self.ndim == 2: - # Ensure that our shape is correct for DataFrame. - # ExtensionArrays are always 1-D, even in a DataFrame when - # the analogous NumPy-backed column would be a 2-D ndarray. - values = values.reshape(1, -1) - return values + def external_values(self): + # NB: this is different from np.asarray(self.values), since that + # return an object-dtype ndarray of Timestamps. + return np.asarray(self.values.astype("datetime64[ns]", copy=False)) def _slice(self, slicer): """ return a slice of my values """ @@ -2467,12 +2461,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): ) return rvalues - def external_values(self): - return np.asarray(self.values.astype("timedelta64[ns]", copy=False)) - - def array_values(self) -> ExtensionArray: - return TimedeltaArray._simple_new(self.values) - class BoolBlock(NumericBlock): __slots__ = () @@ -2485,11 +2473,6 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def should_store(self, value: ArrayLike) -> bool: - return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( - value - ) - def replace( self, to_replace, value, inplace=False, filter=None, regex=False, convert=True ): @@ -2577,15 +2560,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def should_store(self, value: ArrayLike) -> bool: - return not ( - issubclass( - value.dtype.type, - (np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_), - ) - or is_extension_array_dtype(value) - ) - def replace( self, to_replace, value, inplace=False, filter=None, regex=False, convert=True ): @@ -2816,6 +2790,8 @@ class CategoricalBlock(ExtensionBlock): _can_hold_na = True _concatenator = staticmethod(concat_categorical) + should_store = Block.should_store + def __init__(self, values, placement, ndim=None): # coerce to categorical if we can values = extract_array(values) @@ -2826,9 +2802,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical - def should_store(self, arr: ArrayLike): - return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype) - def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values From 022b517b74b5d1f7f70eace4f52e2bab3cd18575 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Mar 2020 16:13:03 -0700 Subject: [PATCH 2/3] fixup! [ENH] Add "fullmatch" matching mode to Series.str [#32806] (#32807) --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6ee857eaf10ff..67627a3e30df3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2288,7 +2288,7 @@ def get_values(self, dtype=None): if is_object_dtype(dtype): values = values.astype(object) - # TODO(EA2D): reshape uuncessary with 2D EAs + # TODO(EA2D): reshape unnecessary with 2D EAs return np.asarray(values).reshape(self.shape) def external_values(self): From 92e8616ce98295dc6c5dc35073fad1f4e38b61ac Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 26 Mar 2020 15:04:23 -0700 Subject: [PATCH 3/3] restore comments, _simple_new --- pandas/core/internals/blocks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d22ed7ddc11f7..71b05eff9b118 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2076,6 +2076,7 @@ def get_values(self, dtype=None): return object dtype as boxed values, such as Timestamps/Timedelta """ if is_object_dtype(dtype): + # DTA/TDA constructor and astype can handle 2D return self._holder(self.values).astype(object) return self.values @@ -2084,7 +2085,7 @@ def internal_values(self): return self.array_values() def array_values(self): - return self._holder(self.values) + return self._holder._simple_new(self.values) def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta @@ -2225,6 +2226,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") should_store = Block.should_store + array_values = ExtensionBlock.array_values @property def _holder(self): @@ -2284,6 +2286,9 @@ def get_values(self, dtype=None): values = values.astype(object) # TODO(EA2D): reshape unnecessary with 2D EAs + # Ensure that our shape is correct for DataFrame. + # ExtensionArrays are always 1-D, even in a DataFrame when + # the analogous NumPy-backed column would be a 2-D ndarray. return np.asarray(values).reshape(self.shape) def external_values(self):