diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0c98a779424bd..ea9c697345ba9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -55,12 +55,7 @@ ABCPandasArray, ABCSeries, ) -from pandas.core.dtypes.missing import ( - _isna_compat, - array_equivalent, - is_valid_nat_for_dtype, - isna, -) +from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna import pandas.core.algorithms as algos from pandas.core.array_algos.transforms import shift @@ -1367,11 +1362,6 @@ def where_func(cond, values, other): return result_blocks - def equals(self, other) -> bool: - if self.dtype != other.dtype or self.shape != other.shape: - return False - return array_equivalent(self.values, other.values) - def _unstack(self, unstacker, fill_value, new_placement): """ Return a list of unstacked blocks of self @@ -1865,9 +1855,6 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] - def equals(self, other) -> bool: - return self.values.equals(other.values) - def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. # We override ObjectBlock._unstack, which unstacks directly on the @@ -1913,12 +1900,6 @@ class NumericBlock(Block): class FloatOrComplexBlock(NumericBlock): __slots__ = () - def equals(self, other) -> bool: - if self.dtype != other.dtype or self.shape != other.shape: - return False - left, right = self.values, other.values - return ((left == right) | (np.isnan(left) & np.isnan(right))).all() - class FloatBlock(FloatOrComplexBlock): __slots__ = () @@ -2282,12 +2263,6 @@ def setitem(self, indexer, value): ) return newb.setitem(indexer, value) - def equals(self, other) -> bool: - # override for significant performance improvement - if self.dtype != other.dtype or self.shape != other.shape: - return False - return (self.values.view("i8") == other.values.view("i8")).all() - def quantile(self, qs, interpolation="linear", axis=0): naive = self.values.view("M8[ns]") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6055a6205d286..51865ad1c3e1a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, is_datetimelike_v_numeric, + is_dtype_equal, is_extension_array_dtype, is_list_like, is_numeric_v_string_like, @@ -27,9 +28,10 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import array_equivalent, isna import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject import pandas.core.common as com @@ -1409,29 +1411,39 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True ) - def equals(self, other) -> bool: + def equals(self, other: "BlockManager") -> bool: self_axes, other_axes = self.axes, other.axes if len(self_axes) != len(other_axes): return False if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): return False - self._consolidate_inplace() - other._consolidate_inplace() - if len(self.blocks) != len(other.blocks): - return False - # canonicalize block order, using a tuple combining the mgr_locs - # then type name because there might be unconsolidated - # blocks (say, Categorical) which can only be distinguished by - # the iteration order - def canonicalize(block): - return (block.mgr_locs.as_array.tolist(), block.dtype.name) - - self_blocks = sorted(self.blocks, key=canonicalize) - other_blocks = sorted(other.blocks, key=canonicalize) - return all( - block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) - ) + if self.ndim == 1: + # For SingleBlockManager (i.e.Series) + if other.ndim != 1: + return False + left = self.blocks[0].values + right = other.blocks[0].values + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ExtensionArray): + return left.equals(right) + else: + return array_equivalent(left, right) + + for i in range(len(self.items)): + # Check column-wise, return False if any column doesnt match + left = self.iget_values(i) + right = other.iget_values(i) + if not is_dtype_equal(left.dtype, right.dtype): + return False + elif isinstance(left, ExtensionArray): + if not left.equals(right): + return False + else: + if not array_equivalent(left, right): + return False + return True def unstack(self, unstacker, fill_value) -> "BlockManager": """ diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 5fd44d7cd74a9..06ccdd2484a2a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -377,7 +377,7 @@ def test_copy(self, mgr): for blk, cp_blk in zip(mgr.blocks, cp.blocks): # view assertion - assert cp_blk.equals(blk) + tm.assert_equal(cp_blk.values, blk.values) if isinstance(blk.values, np.ndarray): assert cp_blk.values.base is blk.values.base else: @@ -389,7 +389,7 @@ def test_copy(self, mgr): # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - assert cp_blk.equals(blk) + tm.assert_equal(cp_blk.values, blk.values) if not isinstance(cp_blk.values, np.ndarray): assert cp_blk.values._data.base is not blk.values._data.base else: