Skip to content

Commit 55be9e9

Browse files
committed
REF: dont consolidate in BlockManager.equals (pandas-dev#34962)
* REF: dont consolidate in BlockManager.equals * doctest fixup * Remove Block.equals * simplify, comments
1 parent 7a9d5cc commit 55be9e9

File tree

3 files changed

+33
-46
lines changed

3 files changed

+33
-46
lines changed

pandas/core/internals/blocks.py

+1-26
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,7 @@
5656
ABCPandasArray,
5757
ABCSeries,
5858
)
59-
from pandas.core.dtypes.missing import (
60-
_isna_compat,
61-
array_equivalent,
62-
is_valid_nat_for_dtype,
63-
isna,
64-
)
59+
from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna
6560

6661
import pandas.core.algorithms as algos
6762
from pandas.core.array_algos.transforms import shift
@@ -1383,11 +1378,6 @@ def where_func(cond, values, other):
13831378

13841379
return result_blocks
13851380

1386-
def equals(self, other) -> bool:
1387-
if self.dtype != other.dtype or self.shape != other.shape:
1388-
return False
1389-
return array_equivalent(self.values, other.values)
1390-
13911381
def _unstack(self, unstacker, fill_value, new_placement):
13921382
"""
13931383
Return a list of unstacked blocks of self
@@ -1881,9 +1871,6 @@ def where(
18811871

18821872
return [self.make_block_same_class(result, placement=self.mgr_locs)]
18831873

1884-
def equals(self, other) -> bool:
1885-
return self.values.equals(other.values)
1886-
18871874
def _unstack(self, unstacker, fill_value, new_placement):
18881875
# ExtensionArray-safe unstack.
18891876
# We override ObjectBlock._unstack, which unstacks directly on the
@@ -1929,12 +1916,6 @@ class NumericBlock(Block):
19291916
class FloatOrComplexBlock(NumericBlock):
19301917
__slots__ = ()
19311918

1932-
def equals(self, other) -> bool:
1933-
if self.dtype != other.dtype or self.shape != other.shape:
1934-
return False
1935-
left, right = self.values, other.values
1936-
return ((left == right) | (np.isnan(left) & np.isnan(right))).all()
1937-
19381919

19391920
class FloatBlock(FloatOrComplexBlock):
19401921
__slots__ = ()
@@ -2298,12 +2279,6 @@ def setitem(self, indexer, value):
22982279
)
22992280
return newb.setitem(indexer, value)
23002281

2301-
def equals(self, other) -> bool:
2302-
# override for significant performance improvement
2303-
if self.dtype != other.dtype or self.shape != other.shape:
2304-
return False
2305-
return (self.values.view("i8") == other.values.view("i8")).all()
2306-
23072282
def quantile(self, qs, interpolation="linear", axis=0):
23082283
naive = self.values.view("M8[ns]")
23092284

pandas/core/internals/managers.py

+30-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas.core.dtypes.common import (
2020
DT64NS_DTYPE,
2121
is_datetimelike_v_numeric,
22+
is_dtype_equal,
2223
is_extension_array_dtype,
2324
is_list_like,
2425
is_numeric_v_string_like,
@@ -27,9 +28,10 @@
2728
from pandas.core.dtypes.concat import concat_compat
2829
from pandas.core.dtypes.dtypes import ExtensionDtype
2930
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
30-
from pandas.core.dtypes.missing import isna
31+
from pandas.core.dtypes.missing import array_equivalent, isna
3132

3233
import pandas.core.algorithms as algos
34+
from pandas.core.arrays import ExtensionArray
3335
from pandas.core.arrays.sparse import SparseDtype
3436
from pandas.core.base import PandasObject
3537
import pandas.core.common as com
@@ -1409,29 +1411,39 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
14091411
new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
14101412
)
14111413

1412-
def equals(self, other) -> bool:
1414+
def equals(self, other: "BlockManager") -> bool:
14131415
self_axes, other_axes = self.axes, other.axes
14141416
if len(self_axes) != len(other_axes):
14151417
return False
14161418
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
14171419
return False
1418-
self._consolidate_inplace()
1419-
other._consolidate_inplace()
1420-
if len(self.blocks) != len(other.blocks):
1421-
return False
14221420

1423-
# canonicalize block order, using a tuple combining the mgr_locs
1424-
# then type name because there might be unconsolidated
1425-
# blocks (say, Categorical) which can only be distinguished by
1426-
# the iteration order
1427-
def canonicalize(block):
1428-
return (block.mgr_locs.as_array.tolist(), block.dtype.name)
1429-
1430-
self_blocks = sorted(self.blocks, key=canonicalize)
1431-
other_blocks = sorted(other.blocks, key=canonicalize)
1432-
return all(
1433-
block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)
1434-
)
1421+
if self.ndim == 1:
1422+
# For SingleBlockManager (i.e.Series)
1423+
if other.ndim != 1:
1424+
return False
1425+
left = self.blocks[0].values
1426+
right = other.blocks[0].values
1427+
if not is_dtype_equal(left.dtype, right.dtype):
1428+
return False
1429+
elif isinstance(left, ExtensionArray):
1430+
return left.equals(right)
1431+
else:
1432+
return array_equivalent(left, right)
1433+
1434+
for i in range(len(self.items)):
1435+
# Check column-wise, return False if any column doesnt match
1436+
left = self.iget_values(i)
1437+
right = other.iget_values(i)
1438+
if not is_dtype_equal(left.dtype, right.dtype):
1439+
return False
1440+
elif isinstance(left, ExtensionArray):
1441+
if not left.equals(right):
1442+
return False
1443+
else:
1444+
if not array_equivalent(left, right):
1445+
return False
1446+
return True
14351447

14361448
def unstack(self, unstacker, fill_value) -> "BlockManager":
14371449
"""

pandas/tests/internals/test_internals.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def test_copy(self, mgr):
377377
for blk, cp_blk in zip(mgr.blocks, cp.blocks):
378378

379379
# view assertion
380-
assert cp_blk.equals(blk)
380+
tm.assert_equal(cp_blk.values, blk.values)
381381
if isinstance(blk.values, np.ndarray):
382382
assert cp_blk.values.base is blk.values.base
383383
else:
@@ -389,7 +389,7 @@ def test_copy(self, mgr):
389389

390390
# copy assertion we either have a None for a base or in case of
391391
# some blocks it is an array (e.g. datetimetz), but was copied
392-
assert cp_blk.equals(blk)
392+
tm.assert_equal(cp_blk.values, blk.values)
393393
if not isinstance(cp_blk.values, np.ndarray):
394394
assert cp_blk.values._data.base is not blk.values._data.base
395395
else:

0 commit comments

Comments
 (0)