-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: dont consolidate in BlockManager.equals #34962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3e6dc47
16ad392
5559ef5
8446751
135d9ab
dc26851
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
from pandas.core.dtypes.common import ( | ||
DT64NS_DTYPE, | ||
is_datetimelike_v_numeric, | ||
is_dtype_equal, | ||
is_extension_array_dtype, | ||
is_list_like, | ||
is_numeric_v_string_like, | ||
|
@@ -27,9 +28,10 @@ | |
from pandas.core.dtypes.concat import concat_compat | ||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
from pandas.core.dtypes.missing import array_equivalent, isna | ||
|
||
import pandas.core.algorithms as algos | ||
from pandas.core.arrays import ExtensionArray | ||
from pandas.core.arrays.sparse import SparseDtype | ||
from pandas.core.base import PandasObject | ||
import pandas.core.common as com | ||
|
@@ -1409,29 +1411,39 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True | |
new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True | ||
) | ||
|
||
def equals(self, other) -> bool: | ||
def equals(self, other: "BlockManager") -> bool: | ||
self_axes, other_axes = self.axes, other.axes | ||
if len(self_axes) != len(other_axes): | ||
return False | ||
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): | ||
return False | ||
self._consolidate_inplace() | ||
other._consolidate_inplace() | ||
if len(self.blocks) != len(other.blocks): | ||
return False | ||
|
||
# canonicalize block order, using a tuple combining the mgr_locs | ||
# then type name because there might be unconsolidated | ||
# blocks (say, Categorical) which can only be distinguished by | ||
# the iteration order | ||
def canonicalize(block): | ||
return (block.mgr_locs.as_array.tolist(), block.dtype.name) | ||
|
||
self_blocks = sorted(self.blocks, key=canonicalize) | ||
other_blocks = sorted(other.blocks, key=canonicalize) | ||
return all( | ||
block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) | ||
) | ||
if self.ndim == 1: | ||
# For SingleBlockManager (i.e.Series) | ||
if other.ndim != 1: | ||
return False | ||
left = self.blocks[0].values | ||
right = other.blocks[0].values | ||
if not is_dtype_equal(left.dtype, right.dtype): | ||
return False | ||
elif isinstance(left, ExtensionArray): | ||
return left.equals(right) | ||
else: | ||
return array_equivalent(left, right) | ||
|
||
for i in range(len(self.items)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment added |
||
# Check column-wise, return False if any column doesnt match | ||
left = self.iget_values(i) | ||
right = other.iget_values(i) | ||
if not is_dtype_equal(left.dtype, right.dtype): | ||
return False | ||
elif isinstance(left, ExtensionArray): | ||
if not left.equals(right): | ||
return False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this can be And same for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, will update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, wait, not here. this is an "all" check, so we only return on False. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yes, it's inside the for loop .. |
||
else: | ||
if not array_equivalent(left, right): | ||
return False | ||
return True | ||
|
||
def unstack(self, unstacker, fill_value) -> "BlockManager": | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -377,7 +377,7 @@ def test_copy(self, mgr): | |
for blk, cp_blk in zip(mgr.blocks, cp.blocks): | ||
|
||
# view assertion | ||
assert cp_blk.equals(blk) | ||
tm.assert_equal(cp_blk.values, blk.values) | ||
if isinstance(blk.values, np.ndarray): | ||
assert cp_blk.values.base is blk.values.base | ||
else: | ||
|
@@ -389,7 +389,7 @@ def test_copy(self, mgr): | |
|
||
# copy assertion we either have a None for a base or in case of | ||
# some blocks it is an array (e.g. datetimetz), but was copied | ||
assert cp_blk.equals(blk) | ||
tm.assert_equal(cp_blk.values, blk.values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should equals also work here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, Block.equals is removed |
||
if not isinstance(cp_blk.values, np.ndarray): | ||
assert cp_blk.values._data.base is not blk.values._data.base | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you give some comments here (or at least a comment about the ordering of the checks)