Skip to content

BUG: Suppress FutureWarning when comparing object arrays #8512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,16 @@ locations treated as equal.
(df+df).equals(df*2)
Note that the Series or DataFrame index needs to be in the same order for
equality to be True:

.. ipython:: python
df = DataFrame({'col':['foo', 0, np.nan]})
df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])
df.equals(df2)
df.equals(df2.sort())
Combining overlapping data sets
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
5 changes: 3 additions & 2 deletions doc/source/v0.13.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,10 @@ API changes

.. ipython:: python

df = DataFrame({'col':['foo', 0, np.nan]}).sort()
df = DataFrame({'col':['foo', 0, np.nan]})
df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])
df.equals(df)
df.equals(df2)
df.equals(df2.sort())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, this is fine

is their an example in basics.rst as well?


import pandas.core.common as com
com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan]))
Expand Down
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1110,3 +1110,4 @@ Bug Fixes
- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`)
- Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`)
- Bug in item assignment of a ``DataFrame`` with multi-index columns where right-hand-side columns were not aligned (:issue:`7655`)
- Suppress FutureWarning generated by NumPy when comparing object arrays containing NaN for equality (:issue:`7065`)
6 changes: 3 additions & 3 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ def array_equivalent(left, right, strict_nan=False):
>>> array_equivalent(np.array([1, nan, 2]), np.array([1, 2, nan]))
False
"""

left, right = np.asarray(left), np.asarray(right)
if left.shape != right.shape: return False

Expand All @@ -414,8 +415,8 @@ def array_equivalent(left, right, strict_nan=False):

if not strict_nan:
# pd.isnull considers NaN and None to be equivalent.
return ((left == right) | (pd.isnull(left) & pd.isnull(right))).all()

return lib.array_equivalent_object(left.ravel(), right.ravel())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here, I think you need to do:

lib.array_equivalent_object(_values_from_object(left).ravel(),_values_from_object(right).ravel())

for left_value, right_value in zip(left, right):
if left_value is tslib.NaT and right_value is not tslib.NaT:
return False
Expand All @@ -426,7 +427,6 @@ def array_equivalent(left, right, strict_nan=False):
else:
if left_value != right_value:
return False

return True

# NaNs can occur in float and complex arrays.
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ def equals(self, other):
if type(other) != Index:
return other.equals(self)

return array_equivalent(self, other)
return array_equivalent(_values_from_object(self), _values_from_object(other))

def identical(self, other):
"""Similar to equals, but check that other comparable attributes are
Expand Down Expand Up @@ -2260,7 +2260,7 @@ def equals(self, other):
# return False

try:
return array_equivalent(self, other)
return array_equivalent(_values_from_object(self), _values_from_object(other))
except TypeError:
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
return False
Expand Down Expand Up @@ -4175,7 +4175,8 @@ def equals(self, other):
return True

if not isinstance(other, MultiIndex):
return array_equivalent(self.values, _ensure_index(other))
return array_equivalent(self.values,
_values_from_object(_ensure_index(other)))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perfect

if self.nlevels != other.nlevels:
return False
Expand Down
20 changes: 20 additions & 0 deletions pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,26 @@ def list_to_object_array(list obj):
return arr


@cython.wraparound(False)
@cython.boundscheck(False)
def array_equivalent_object(ndarray left, ndarray right):
cdef Py_ssize_t i, n
cdef object lobj, robj

n = len(left)
for i from 0 <= i < n:
lobj = left[i]
robj = right[i]

# we are either not equal or both nan
# I think None == None will be true here
if lobj != robj:
if checknull(lobj) and checknull(robj):
continue
return False
return True


@cython.wraparound(False)
@cython.boundscheck(False)
def fast_unique(ndarray[object] values):
Expand Down