Skip to content

Commit b7194ab

Browse files
author
Tom Augspurger
committed
BUG/TST: sorting of NaNs on sym_diff
1 parent 7e2f7bc commit b7194ab

File tree

3 files changed

+12
-6
lines changed

3 files changed

+12
-6
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Bug Fixes
169169
- Bug in :meth:`DataFrame.replace` where nested dicts were erroneously
170170
depending on the order of dictionary keys and values (:issue:`5338`).
171171
- Perf issue in concatting with empty objects (:issue:`3259`)
172+
- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`)
172173
173174
pandas 0.13.1
174175
-------------

pandas/core/index.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,9 @@ def sym_diff(self, other, result_name=None):
10451045
``idx2`` but not both. Equivalent to the Index created by
10461046
``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped.
10471047
1048+
The sorting of a result containing ``NaN``s is not guaranteed
1049+
across Python versions. See GitHub issue #6444.
1050+
10481051
Examples
10491052
--------
10501053
>>> idx1 = Index([1, 2, 3, 4])
@@ -1067,7 +1070,6 @@ def sym_diff(self, other, result_name=None):
10671070
the_diff = sorted(set((self - other) + (other - self)))
10681071
return Index(the_diff, name=result_name)
10691072

1070-
10711073
def unique(self):
10721074
"""
10731075
Return array of unique values in the Index. Significantly faster than

pandas/tests/test_index.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -493,13 +493,16 @@ def test_symmetric_diff(self):
493493
self.assert_(tm.equalContents(result, expected))
494494

495495
# nans:
496-
idx1 = Index([1, 2, np.nan])
496+
# GH #6444, sorting of nans. Make sure the number of nans is right
497+
# and the correct non-nan values are there. punt on sorting.
498+
idx1 = Index([1, 2, 3, np.nan])
497499
idx2 = Index([0, 1, np.nan])
498500
result = idx1.sym_diff(idx2)
499-
expected = Index([0.0, np.nan, 2.0, np.nan]) # oddness with nans
500-
nans = pd.isnull(expected)
501-
self.assert_(pd.isnull(result[nans]).all())
502-
self.assert_(tm.equalContents(result[~nans], expected[~nans]))
501+
# expected = Index([0.0, np.nan, 2.0, 3.0, np.nan])
502+
nans = pd.isnull(result)
503+
self.assertEqual(nans.sum(), 2)
504+
self.assertEqual((~nans).sum(), 3)
505+
[self.assertIn(x, result) for x in [0.0, 2.0, 3.0]]
503506

504507
# other not an Index:
505508
idx1 = Index([1, 2, 3, 4], name='idx1')

0 commit comments

Comments
 (0)