Skip to content

Commit e3204b2

Browse files
committed
adding asv-benchmark hash_functions
1 parent 9cb3723 commit e3204b2

File tree

1 file changed

+164
-0
lines changed

1 file changed

+164
-0
lines changed
+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class IsinAlmostFullWithRandomInt:
7+
params = [
8+
[np.float64, np.int64, np.uint64, np.object],
9+
range(10, 21),
10+
]
11+
param_names = ["dtype", "exponent"]
12+
13+
def setup(self, dtype, exponent):
14+
M = 3 * 2 ** (exponent - 2)
15+
# 0.77-the maximal share of occupied buckets
16+
np.random.seed(42)
17+
self.s = pd.Series(np.random.randint(0, M, M)).astype(dtype)
18+
self.values = np.random.randint(0, M, M).astype(dtype)
19+
self.values_outside = self.values + M
20+
21+
def time_isin(self, dtype, exponent):
22+
self.s.isin(self.values)
23+
24+
def time_isin_outside(self, dtype, exponent):
25+
self.s.isin(self.values_outside)
26+
27+
28+
class IsinWithRandomFloat:
29+
params = [
30+
[np.float64, np.object],
31+
[
32+
1_300,
33+
2_000,
34+
7_000,
35+
8_000,
36+
70_000,
37+
80_000,
38+
750_000,
39+
900_000,
40+
],
41+
]
42+
param_names = ["dtype", "M"]
43+
44+
def setup(self, dtype, M):
45+
np.random.seed(42)
46+
self.values = np.random.rand(M)
47+
self.s = pd.Series(self.values).astype(dtype)
48+
np.random.shuffle(self.values)
49+
self.values_outside = self.values + 0.1
50+
51+
def time_isin(self, dtype, M):
52+
self.s.isin(self.values)
53+
54+
def time_isin_outside(self, dtype, M):
55+
self.s.isin(self.values_outside)
56+
57+
58+
class IsinWithArangeSorted:
59+
params = [
60+
[np.float64, np.int64, np.uint64, np.object],
61+
[
62+
1_000,
63+
2_000,
64+
8_000,
65+
100_000,
66+
1_000_000,
67+
],
68+
]
69+
param_names = ["dtype", "M"]
70+
71+
def setup(self, dtype, M):
72+
self.s = pd.Series(np.arange(M)).astype(dtype)
73+
self.values = np.arange(M).astype(dtype)
74+
75+
def time_isin(self, dtype, M):
76+
self.s.isin(self.values)
77+
78+
79+
class IsinWithArange:
80+
params = [
81+
[np.float64, np.int64, np.uint64, np.object],
82+
[
83+
1_000,
84+
2_000,
85+
8_000,
86+
],
87+
[-2, 0, 2],
88+
]
89+
param_names = ["dtype", "M", "offset_factor"]
90+
91+
def setup(self, dtype, M, offset_factor):
92+
offset = int(M * offset_factor)
93+
np.random.seed(42)
94+
tmp = pd.Series(np.random.randint(offset, M + offset, 10 ** 6))
95+
self.s = tmp.astype(dtype)
96+
self.values = np.arange(M).astype(dtype)
97+
98+
def time_isin(self, dtype, M, offset_factor):
99+
self.s.isin(self.values)
100+
101+
102+
class Float64GroupIndex:
103+
# GH28303
104+
def setup(self):
105+
self.df = pd.date_range(
106+
start="1/1/2018", end="1/2/2018", periods=1e6
107+
).to_frame()
108+
self.group_index = np.round(self.df.index.astype(int) / 1e9)
109+
110+
def time_groupby(self):
111+
self.df.groupby(self.group_index).last()
112+
113+
114+
class UniqueAndFactorizeArange:
115+
params = range(4, 16)
116+
param_names = ["exponent"]
117+
118+
def setup(self, exponent):
119+
a = np.arange(10 ** 4, dtype="float64")
120+
self.a2 = (a + 10 ** exponent).repeat(100)
121+
122+
def time_factorize(self, exponent):
123+
pd.factorize(self.a2)
124+
125+
def time_unique(self, exponent):
126+
pd.unique(self.a2)
127+
128+
129+
class NumericSeriesIndexing:
130+
131+
params = [
132+
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
133+
(10 ** 4, 10 ** 5, 5 * 10 ** 5, 10 ** 6, 5 * 10 ** 6),
134+
]
135+
param_names = ["index_dtype", "N"]
136+
137+
def setup(self, index, N):
138+
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
139+
indices = index(vals)
140+
self.data = pd.Series(np.arange(N), index=indices)
141+
142+
def time_loc_slice(self, index, N):
143+
# trigger building of mapping
144+
self.data.loc[:800]
145+
146+
147+
class NumericSeriesIndexingShuffled:
148+
149+
params = [
150+
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
151+
(10 ** 4, 10 ** 5, 5 * 10 ** 5, 10 ** 6, 5 * 10 ** 6),
152+
]
153+
param_names = ["index_dtype", "N"]
154+
155+
def setup(self, index, N):
156+
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
157+
np.random.seed(42)
158+
np.random.shuffle(vals)
159+
indices = index(vals)
160+
self.data = pd.Series(np.arange(N), index=indices)
161+
162+
def time_loc_slice(self, index, N):
163+
# trigger building of mapping
164+
self.data.loc[:800]

0 commit comments

Comments
 (0)