diff --git a/doc/source/basics.rst b/doc/source/basics.rst index bb65312f053f3..49959b1e4270f 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -317,6 +317,16 @@ locations treated as equal. (df+df).equals(df*2) +Note that the Series or DataFrame index needs to be in the same order for +equality to be True: + +.. ipython:: python + + df = DataFrame({'col':['foo', 0, np.nan]}) + df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) + df.equals(df2) + df.equals(df2.sort()) + Combining overlapping data sets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt index b48f555f9691a..64ca1612f00c1 100644 --- a/doc/source/v0.13.1.txt +++ b/doc/source/v0.13.1.txt @@ -121,9 +121,10 @@ API changes .. ipython:: python - df = DataFrame({'col':['foo', 0, np.nan]}).sort() + df = DataFrame({'col':['foo', 0, np.nan]}) df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) - df.equals(df) + df.equals(df2) + df.equals(df2.sort()) import pandas.core.common as com com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan])) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index d972edeb2bbb3..af47ee878b1c3 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -1110,3 +1110,4 @@ Bug Fixes - Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`) - Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`) - Bug in item assignment of a ``DataFrame`` with multi-index columns where right-hand-side columns were not aligned (:issue:`7655`) +- Suppress FutureWarning generated by NumPy when comparing object arrays containing NaN for equality (:issue:`7065`) diff --git a/pandas/core/common.py b/pandas/core/common.py index a3698c569b8b3..da512dc56eaef 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -406,6 +406,7 @@ def array_equivalent(left, right, strict_nan=False): >>> array_equivalent(np.array([1, nan, 2]), np.array([1, 2, nan])) False """ + left, right = np.asarray(left), np.asarray(right) if left.shape != right.shape: return False @@ -414,8 +415,8 @@ def array_equivalent(left, right, strict_nan=False): if not strict_nan: # pd.isnull considers NaN and None to be equivalent. - return ((left == right) | (pd.isnull(left) & pd.isnull(right))).all() - + return lib.array_equivalent_object(left.ravel(), right.ravel()) + for left_value, right_value in zip(left, right): if left_value is tslib.NaT and right_value is not tslib.NaT: return False @@ -426,7 +427,6 @@ def array_equivalent(left, right, strict_nan=False): else: if left_value != right_value: return False - return True # NaNs can occur in float and complex arrays. diff --git a/pandas/core/index.py b/pandas/core/index.py index f87b7e982b332..b9f1a06b171ed 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1047,7 +1047,7 @@ def equals(self, other): if type(other) != Index: return other.equals(self) - return array_equivalent(self, other) + return array_equivalent(_values_from_object(self), _values_from_object(other)) def identical(self, other): """Similar to equals, but check that other comparable attributes are @@ -2260,7 +2260,7 @@ def equals(self, other): # return False try: - return array_equivalent(self, other) + return array_equivalent(_values_from_object(self), _values_from_object(other)) except TypeError: # e.g. fails in numpy 1.6 with DatetimeIndex #1681 return False @@ -4175,7 +4175,8 @@ def equals(self, other): return True if not isinstance(other, MultiIndex): - return array_equivalent(self.values, _ensure_index(other)) + return array_equivalent(self.values, + _values_from_object(_ensure_index(other))) if self.nlevels != other.nlevels: return False diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 7a90072b2410e..a845b9c90865b 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -330,6 +330,26 @@ def list_to_object_array(list obj): return arr +@cython.wraparound(False) +@cython.boundscheck(False) +def array_equivalent_object(ndarray left, ndarray right): + cdef Py_ssize_t i, n + cdef object lobj, robj + + n = len(left) + for i from 0 <= i < n: + lobj = left[i] + robj = right[i] + + # we are either not equal or both nan + # I think None == None will be true here + if lobj != robj: + if checknull(lobj) and checknull(robj): + continue + return False + return True + + @cython.wraparound(False) @cython.boundscheck(False) def fast_unique(ndarray[object] values):