-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API: Add equals method to NDFrames. #5283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -277,6 +277,42 @@ def notnull(obj): | |
return -res | ||
|
||
|
||
def array_equivalent(left, right): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the goal was to use this (eventually?) in Index comparisons as well IIRC. (I believe their were some perf issues in that regards to not doing that now?) In any event let's defer using it in index for now. @jtratner ?
|
||
""" | ||
True if two arrays, left and right, have equal non-NaN elements, and NaNs in | ||
corresponding locations. False otherwise. It is assumed that left and right | ||
are NumPy arrays of the same dtype. The behavior of this function | ||
(particularly with respect to NaNs) is not defined if the dtypes are | ||
different. | ||
|
||
Parameters | ||
---------- | ||
left, right : array_like | ||
Input arrays. | ||
|
||
Returns | ||
------- | ||
b : bool | ||
Returns True if the arrays are equivalent. | ||
|
||
Examples | ||
-------- | ||
>>> array_equivalent([1, 2, nan], np.array([1, 2, nan])) | ||
True | ||
>>> array_equivalent([1, nan, 2], [1, 2, nan]) | ||
False | ||
""" | ||
if left.shape != right.shape: return False | ||
# NaNs occur only in object arrays, float or complex arrays. | ||
if left.dtype == np.object_: | ||
# If object array, we need to use pd.isnull | ||
return ((left == right) | pd.isnull(left) & pd.isnull(right)).all() | ||
elif not issubclass(left.dtype.type, (np.floating, np.complexfloating)): | ||
# if not a float or complex array, then there are no NaNs | ||
return np.array_equal(left, right) | ||
# For float or complex arrays, using np.isnan is faster than pd.isnull | ||
return ((left == right) | (np.isnan(left) & np.isnan(right))).all() | ||
|
||
def _iterable_not_string(x): | ||
return (isinstance(x, collections.Iterable) and | ||
not isinstance(x, compat.string_types)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1121,13 +1121,24 @@ def func(c, v, o): | |
|
||
return result_blocks | ||
|
||
def equals(self, other): | ||
if self.dtype != other.dtype or self.shape != other.shape: return False | ||
return np.array_equal(self.values, other.values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just want to record this here, since my thoughts on this matter have been so muddled:
so object arrays do not need to be special-cased,
Thankfully,
or can
so |
||
|
||
|
||
class NumericBlock(Block): | ||
is_numeric = True | ||
_can_hold_na = True | ||
|
||
|
||
class FloatBlock(NumericBlock): | ||
class FloatOrComplexBlock(NumericBlock): | ||
def equals(self, other): | ||
if self.dtype != other.dtype or self.shape != other.shape: return False | ||
left, right = self.values, other.values | ||
return ((left == right) | (np.isnan(left) & np.isnan(right))).all() | ||
|
||
|
||
class FloatBlock(FloatOrComplexBlock): | ||
is_float = True | ||
_downcast_dtype = 'int64' | ||
|
||
|
@@ -1166,8 +1177,7 @@ def should_store(self, value): | |
return (issubclass(value.dtype.type, np.floating) and | ||
value.dtype == self.dtype) | ||
|
||
|
||
class ComplexBlock(NumericBlock): | ||
class ComplexBlock(FloatOrComplexBlock): | ||
is_complex = True | ||
|
||
def _can_hold_element(self, element): | ||
|
@@ -2563,7 +2573,7 @@ def get_data(self, copy=False, columns=None, **kwargs): | |
return self.combine(blocks) | ||
|
||
def combine(self, blocks): | ||
""" reutrn a new manager with the blocks """ | ||
""" return a new manager with the blocks """ | ||
indexer = np.sort(np.concatenate([b.ref_locs for b in blocks])) | ||
new_items = self.items.take(indexer) | ||
|
||
|
@@ -3491,6 +3501,16 @@ def item_dtypes(self): | |
raise AssertionError('Some items were not in any block') | ||
return result | ||
|
||
def equals(self, other): | ||
self_axes, other_axes = self.axes, other.axes | ||
if len(self_axes) != len(other_axes): | ||
return False | ||
if not all (ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): | ||
return False | ||
self._consolidate_inplace() | ||
other._consolidate_inplace() | ||
return all(block.equals(oblock) for block, oblock in | ||
zip(self.blocks, other.blocks)) | ||
|
||
class SingleBlockManager(BlockManager): | ||
|
||
|
@@ -4004,6 +4024,9 @@ def _merge_blocks(blocks, items, dtype=None, _can_consolidate=True): | |
raise AssertionError("_merge_blocks are invalid!") | ||
dtype = blocks[0].dtype | ||
|
||
if not items.is_unique: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The example you gave did indeed break the code. I've added your example to test_internals.py and am handling this case by sorting the blocks according to their |
||
blocks = sorted(blocks, key=lambda b: b.ref_locs.tolist()) | ||
|
||
new_values = _vstack([b.values for b in blocks], dtype) | ||
new_items = blocks[0].items.append([b.items for b in blocks[1:]]) | ||
new_block = make_block(new_values, new_items, items) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,10 @@ | |
from nose.tools import assert_equal | ||
import numpy as np | ||
from pandas.tslib import iNaT, NaT | ||
|
||
from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp | ||
from pandas import compat | ||
from pandas.compat import range, long, lrange, lmap, u | ||
from pandas.core.common import notnull, isnull | ||
from pandas.core.common import notnull, isnull, array_equivalent | ||
import pandas.core.common as com | ||
import pandas.util.testing as tm | ||
import pandas.core.config as cf | ||
|
@@ -167,6 +166,19 @@ def test_downcast_conv(): | |
result = com._possibly_downcast_to_dtype(arr,'infer') | ||
tm.assert_almost_equal(result, expected) | ||
|
||
|
||
def test_array_equivalent(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||
assert array_equivalent(np.array([np.nan, np.nan]), np.array([np.nan, np.nan])) | ||
assert array_equivalent(np.array([np.nan, 1, np.nan]), np.array([np.nan, 1, np.nan])) | ||
assert array_equivalent(np.array([np.nan, None], dtype='object'), | ||
np.array([np.nan, None], dtype='object')) | ||
assert array_equivalent(np.array([np.nan, 1+1j], dtype='complex'), | ||
np.array([np.nan, 1+1j], dtype='complex')) | ||
assert not array_equivalent(np.array([np.nan, 1+1j], dtype='complex'), | ||
np.array([np.nan, 1+2j], dtype='complex')) | ||
assert not array_equivalent(np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) | ||
assert not array_equivalent(np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) | ||
|
||
def test_datetimeindex_from_empty_datetime64_array(): | ||
for unit in [ 'ms', 'us', 'ns' ]: | ||
idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I merged this thanks! maybe as a small followup....can you explain in the docs why one would need to do this, maybe a small example is in order?