Skip to content

Commit c9f2b3f

Browse files
committed
BUG: reindexing a frame fails to pick nan value
1 parent 0d35dd4 commit c9f2b3f

File tree

6 files changed

+33
-3
lines changed

6 files changed

+33
-3
lines changed

pandas/src/klib/khash_python.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// kludge
66

77
#define kh_float64_hash_func _Py_HashDouble
8-
#define kh_float64_hash_equal kh_int64_hash_equal
8+
#define kh_float64_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a)))
99

1010
#define KHASH_MAP_INIT_FLOAT64(name, khval_t) \
1111
KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal)

pandas/tests/test_base.py

+10
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,16 @@ def test_duplicated_drop_duplicates(self):
674674
s.drop_duplicates(inplace=True)
675675
tm.assert_series_equal(s, original)
676676

677+
678+
class TestFloat64HashTable(tm.TestCase):
679+
def test_lookup_nan(self):
680+
from pandas.hashtable import Float64HashTable
681+
xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
682+
m = Float64HashTable()
683+
m.map_locations(xs)
684+
self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs)))
685+
686+
677687
if __name__ == '__main__':
678688
import nose
679689

pandas/tests/test_frame.py

+10
Original file line numberDiff line numberDiff line change
@@ -9263,6 +9263,16 @@ def test_reindex(self):
92639263
assert_frame_equal(result,self.frame)
92649264
self.assertFalse(result is self.frame)
92659265

9266+
def test_reindex_nan(self):
9267+
df = pd.DataFrame([[1, 2], [3, 5], [7, 11], [9, 23]],
9268+
index=[2, np.nan, 1, 5], columns=['joe', 'jim'])
9269+
9270+
i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1]
9271+
tm.assert_frame_equal(df.reindex(i), df.iloc[j])
9272+
9273+
df.index = df.index.astype('object')
9274+
tm.assert_frame_equal(df.reindex(i), df.iloc[j])
9275+
92669276
def test_reindex_name_remains(self):
92679277
s = Series(random.rand(10))
92689278
df = DataFrame(s, index=np.arange(len(s)))

pandas/tests/test_index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ def test_symmetric_diff(self):
740740
# expected = Index([0.0, np.nan, 2.0, 3.0, np.nan])
741741

742742
nans = pd.isnull(result)
743-
self.assertEqual(nans.sum(), 2)
743+
self.assertEqual(nans.sum(), 1)
744744
self.assertEqual((~nans).sum(), 3)
745745
[self.assertIn(x, result) for x in [0.0, 2.0, 3.0]]
746746

pandas/tests/test_series.py

+9
Original file line numberDiff line numberDiff line change
@@ -5846,6 +5846,15 @@ def test_reindex(self):
58465846
result = self.ts.reindex()
58475847
self.assertFalse((result is self.ts))
58485848

5849+
def test_reindex_nan(self):
5850+
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
5851+
5852+
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
5853+
assert_series_equal(ts.reindex(i), ts.iloc[j])
5854+
5855+
ts.index = ts.index.astype('object')
5856+
assert_series_equal(ts.reindex(i), ts.iloc[j])
5857+
58495858
def test_reindex_corner(self):
58505859
# (don't forget to fix this) I think it's fixed
58515860
reindexed_dep = self.empty.reindex(self.ts.index, method='pad')

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,8 @@ def pxd(name):
456456
'pxdfiles': [],
457457
'depends': lib_depends},
458458
hashtable={'pyxfile': 'hashtable',
459-
'pxdfiles': ['hashtable']},
459+
'pxdfiles': ['hashtable'],
460+
'depends': ['pandas/src/klib/khash_python.h']},
460461
tslib={'pyxfile': 'tslib',
461462
'depends': tseries_depends,
462463
'sources': ['pandas/src/datetime/np_datetime.c',

0 commit comments

Comments
 (0)