File tree 3 files changed +13
-2
lines changed
3 files changed +13
-2
lines changed Original file line number Diff line number Diff line change @@ -515,7 +515,7 @@ def setup(self):
515
515
tmp1 = (np .random .random (10000 ) * 0.1 ).astype (np .float32 )
516
516
tmp2 = (np .random .random (10000 ) * 10.0 ).astype (np .float32 )
517
517
tmp = np .concatenate ((tmp1 , tmp2 ))
518
- arr = np .repeat (tmp , 100 )
518
+ arr = np .repeat (tmp , 10 )
519
519
self .df = DataFrame (dict (a = arr , b = arr ))
520
520
521
521
def time_groupby_sum (self ):
Original file line number Diff line number Diff line change @@ -493,7 +493,7 @@ class float_loc(object):
493
493
goal_time = 0.2
494
494
495
495
def setup (self ):
496
- a = np .arange (1000000 )
496
+ a = np .arange (100000 )
497
497
self .ind = pd .Float64Index (a * 4.8000000418824129e-08 )
498
498
499
499
def time_float_loc (self ):
Original file line number Diff line number Diff line change 2
2
3
3
#include "khash.h"
4
4
5
+ // Previously we were using the built in cpython hash function for doubles
6
+ // python 2.7 https://github.com/python/cpython/blob/2.7/Objects/object.c#L1021
7
+ // python 3.5 https://github.com/python/cpython/blob/3.5/Python/pyhash.c#L85
8
+
9
+ // The python 3 hash function has the invariant hash(x) == hash(int(x)) == hash(decimal(x))
10
+ // and the size of hash may be different by platform / version (long in py2, Py_ssize_t in py3).
11
+ // We don't need those invariants because types will be cast before hashing, and if Py_ssize_t
12
+ // is 64 bits the truncation causes collission issues. Given all that, we use our own
13
+ // simple hash, viewing the double bytes as an int64 and using khash's default
14
+ // hash for 64 bit integers.
15
+ // GH 13436
5
16
inline khint64_t asint64 (double key ) {
6
17
return * (khint64_t * )(& key );
7
18
}
You can’t perform that action at this time.
0 commit comments