diff --git a/pandas/src/klib/khash_python.h b/pandas/src/klib/khash_python.h index d3ef48de0f831..cdd94b5d8522f 100644 --- a/pandas/src/klib/khash_python.h +++ b/pandas/src/klib/khash_python.h @@ -5,7 +5,7 @@ // kludge #define kh_float64_hash_func _Py_HashDouble -#define kh_float64_hash_equal kh_int64_hash_equal +#define kh_float64_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a))) #define KHASH_MAP_INIT_FLOAT64(name, khval_t) \ KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 61bfeb6631d68..b91c46377267a 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -674,6 +674,16 @@ def test_duplicated_drop_duplicates(self): s.drop_duplicates(inplace=True) tm.assert_series_equal(s, original) + +class TestFloat64HashTable(tm.TestCase): + def test_lookup_nan(self): + from pandas.hashtable import Float64HashTable + xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) + m = Float64HashTable() + m.map_locations(xs) + self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs))) + + if __name__ == '__main__': import nose diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f7c91501b683b..c29b3b469050f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9263,6 +9263,16 @@ def test_reindex(self): assert_frame_equal(result,self.frame) self.assertFalse(result is self.frame) + def test_reindex_nan(self): + df = pd.DataFrame([[1, 2], [3, 5], [7, 11], [9, 23]], + index=[2, np.nan, 1, 5], columns=['joe', 'jim']) + + i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1] + tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + + df.index = df.index.astype('object') + tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + def test_reindex_name_remains(self): s = Series(random.rand(10)) df = DataFrame(s, index=np.arange(len(s))) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 75c28681ecde5..79b33c6f78c83 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -740,7 +740,7 @@ def test_symmetric_diff(self): # expected = Index([0.0, np.nan, 2.0, 3.0, np.nan]) nans = pd.isnull(result) - self.assertEqual(nans.sum(), 2) + self.assertEqual(nans.sum(), 1) self.assertEqual((~nans).sum(), 3) [self.assertIn(x, result) for x in [0.0, 2.0, 3.0]] diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index fc399eec39376..5f1cad11f72fe 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5846,6 +5846,15 @@ def test_reindex(self): result = self.ts.reindex() self.assertFalse((result is self.ts)) + def test_reindex_nan(self): + ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8]) + + i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2] + assert_series_equal(ts.reindex(i), ts.iloc[j]) + + ts.index = ts.index.astype('object') + assert_series_equal(ts.reindex(i), ts.iloc[j]) + def test_reindex_corner(self): # (don't forget to fix this) I think it's fixed reindexed_dep = self.empty.reindex(self.ts.index, method='pad') diff --git a/setup.py b/setup.py index 4b2e8c6e01889..524df51190ce3 100755 --- a/setup.py +++ b/setup.py @@ -456,7 +456,8 @@ def pxd(name): 'pxdfiles': [], 'depends': lib_depends}, hashtable={'pyxfile': 'hashtable', - 'pxdfiles': ['hashtable']}, + 'pxdfiles': ['hashtable'], + 'depends': ['pandas/src/klib/khash_python.h']}, tslib={'pyxfile': 'tslib', 'depends': tseries_depends, 'sources': ['pandas/src/datetime/np_datetime.c',