Skip to content

REF: dont consolidate in BlockManager.equals #34962

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 1 addition & 26 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,7 @@
ABCPandasArray,
ABCSeries,
)
from pandas.core.dtypes.missing import (
_isna_compat,
array_equivalent,
is_valid_nat_for_dtype,
isna,
)
from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna

import pandas.core.algorithms as algos
from pandas.core.array_algos.transforms import shift
Expand Down Expand Up @@ -1367,11 +1362,6 @@ def where_func(cond, values, other):

return result_blocks

def equals(self, other) -> bool:
if self.dtype != other.dtype or self.shape != other.shape:
return False
return array_equivalent(self.values, other.values)

def _unstack(self, unstacker, fill_value, new_placement):
"""
Return a list of unstacked blocks of self
Expand Down Expand Up @@ -1865,9 +1855,6 @@ def where(

return [self.make_block_same_class(result, placement=self.mgr_locs)]

def equals(self, other) -> bool:
return self.values.equals(other.values)

def _unstack(self, unstacker, fill_value, new_placement):
# ExtensionArray-safe unstack.
# We override ObjectBlock._unstack, which unstacks directly on the
Expand Down Expand Up @@ -1913,12 +1900,6 @@ class NumericBlock(Block):
class FloatOrComplexBlock(NumericBlock):
__slots__ = ()

def equals(self, other) -> bool:
if self.dtype != other.dtype or self.shape != other.shape:
return False
left, right = self.values, other.values
return ((left == right) | (np.isnan(left) & np.isnan(right))).all()


class FloatBlock(FloatOrComplexBlock):
__slots__ = ()
Expand Down Expand Up @@ -2282,12 +2263,6 @@ def setitem(self, indexer, value):
)
return newb.setitem(indexer, value)

def equals(self, other) -> bool:
# override for significant performance improvement
if self.dtype != other.dtype or self.shape != other.shape:
return False
return (self.values.view("i8") == other.values.view("i8")).all()

def quantile(self, qs, interpolation="linear", axis=0):
naive = self.values.view("M8[ns]")

Expand Down
48 changes: 30 additions & 18 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
is_datetimelike_v_numeric,
is_dtype_equal,
is_extension_array_dtype,
is_list_like,
is_numeric_v_string_like,
Expand All @@ -27,9 +28,10 @@
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.missing import array_equivalent, isna

import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.base import PandasObject
import pandas.core.common as com
Expand Down Expand Up @@ -1409,29 +1411,39 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
)

def equals(self, other) -> bool:
def equals(self, other: "BlockManager") -> bool:
self_axes, other_axes = self.axes, other.axes
if len(self_axes) != len(other_axes):
return False
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
return False
self._consolidate_inplace()
other._consolidate_inplace()
if len(self.blocks) != len(other.blocks):
return False

# canonicalize block order, using a tuple combining the mgr_locs
# then type name because there might be unconsolidated
# blocks (say, Categorical) which can only be distinguished by
# the iteration order
def canonicalize(block):
return (block.mgr_locs.as_array.tolist(), block.dtype.name)

self_blocks = sorted(self.blocks, key=canonicalize)
other_blocks = sorted(other.blocks, key=canonicalize)
return all(
block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)
)
if self.ndim == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give some comments here (or at least a comment about the ordering of the checks)

# For SingleBlockManager (i.e.Series)
if other.ndim != 1:
return False
left = self.blocks[0].values
right = other.blocks[0].values
if not is_dtype_equal(left.dtype, right.dtype):
return False
elif isinstance(left, ExtensionArray):
return left.equals(right)
else:
return array_equivalent(left, right)

for i in range(len(self.items)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment added

# Check column-wise, return False if any column doesnt match
left = self.iget_values(i)
right = other.iget_values(i)
if not is_dtype_equal(left.dtype, right.dtype):
return False
elif isinstance(left, ExtensionArray):
if not left.equals(right):
return False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be return left.equals(right) ?

And same for array_equivalent below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, wait, not here. this is an "all" check, so we only return on False.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes, it's inside the for loop ..

else:
if not array_equivalent(left, right):
return False
return True

def unstack(self, unstacker, fill_value) -> "BlockManager":
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def test_copy(self, mgr):
for blk, cp_blk in zip(mgr.blocks, cp.blocks):

# view assertion
assert cp_blk.equals(blk)
tm.assert_equal(cp_blk.values, blk.values)
if isinstance(blk.values, np.ndarray):
assert cp_blk.values.base is blk.values.base
else:
Expand All @@ -389,7 +389,7 @@ def test_copy(self, mgr):

# copy assertion we either have a None for a base or in case of
# some blocks it is an array (e.g. datetimetz), but was copied
assert cp_blk.equals(blk)
tm.assert_equal(cp_blk.values, blk.values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should equals also work here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, Block.equals is removed

if not isinstance(cp_blk.values, np.ndarray):
assert cp_blk.values._data.base is not blk.values._data.base
else:
Expand Down