diff --git a/doc/source/release.rst b/doc/source/release.rst index 5240c67fc42d8..1e1fe4f52a73f 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -169,6 +169,7 @@ Bug Fixes - Bug in :meth:`DataFrame.replace` where nested dicts were erroneously depending on the order of dictionary keys and values (:issue:`5338`). - Perf issue in concatting with empty objects (:issue:`3259`) +- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`) pandas 0.13.1 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index 3d821f37e41b5..6c45fccda12ab 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1045,6 +1045,9 @@ def sym_diff(self, other, result_name=None): ``idx2`` but not both. Equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped. + The sorting of a result containing ``NaN``s is not guaranteed + across Python versions. See GitHub issue #6444. + Examples -------- >>> idx1 = Index([1, 2, 3, 4]) @@ -1067,7 +1070,6 @@ def sym_diff(self, other, result_name=None): the_diff = sorted(set((self - other) + (other - self))) return Index(the_diff, name=result_name) - def unique(self): """ Return array of unique values in the Index. Significantly faster than diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d8625c8687f79..e828bc100dfcf 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -493,13 +493,16 @@ def test_symmetric_diff(self): self.assert_(tm.equalContents(result, expected)) # nans: - idx1 = Index([1, 2, np.nan]) + # GH #6444, sorting of nans. Make sure the number of nans is right + # and the correct non-nan values are there. punt on sorting. + idx1 = Index([1, 2, 3, np.nan]) idx2 = Index([0, 1, np.nan]) result = idx1.sym_diff(idx2) - expected = Index([0.0, np.nan, 2.0, np.nan]) # oddness with nans - nans = pd.isnull(expected) - self.assert_(pd.isnull(result[nans]).all()) - self.assert_(tm.equalContents(result[~nans], expected[~nans])) + # expected = Index([0.0, np.nan, 2.0, 3.0, np.nan]) + nans = pd.isnull(result) + self.assertEqual(nans.sum(), 2) + self.assertEqual((~nans).sum(), 3) + [self.assertIn(x, result) for x in [0.0, 2.0, 3.0]] # other not an Index: idx1 = Index([1, 2, 3, 4], name='idx1')