Skip to content

Commit a44d8ee

Browse files
kevinankeryehoshuadimarsky
authored andcommitted
ASV: Non-unique DataFrame index (pandas-dev#47551)
1 parent 18511a4 commit a44d8ee

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

asv_bench/benchmarks/indexing.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -157,25 +157,39 @@ def time_boolean_rows_boolean(self):
157157

158158

159159
class DataFrameNumericIndexing:
160-
def setup(self):
160+
161+
params = [
162+
(Int64Index, UInt64Index, Float64Index),
163+
("unique_monotonic_inc", "nonunique_monotonic_inc"),
164+
]
165+
param_names = ["index_dtype", "index_structure"]
166+
167+
def setup(self, index, index_structure):
168+
N = 10**5
169+
indices = {
170+
"unique_monotonic_inc": index(range(N)),
171+
"nonunique_monotonic_inc": index(
172+
list(range(55)) + [54] + list(range(55, N - 1))
173+
),
174+
}
161175
self.idx_dupe = np.array(range(30)) * 99
162-
self.df = DataFrame(np.random.randn(100000, 5))
176+
self.df = DataFrame(np.random.randn(N, 5), index=indices[index_structure])
163177
self.df_dup = concat([self.df, 2 * self.df, 3 * self.df])
164-
self.bool_indexer = [True] * 50000 + [False] * 50000
178+
self.bool_indexer = [True] * (N // 2) + [False] * (N - N // 2)
165179

166-
def time_iloc_dups(self):
180+
def time_iloc_dups(self, index, index_structure):
167181
self.df_dup.iloc[self.idx_dupe]
168182

169-
def time_loc_dups(self):
183+
def time_loc_dups(self, index, index_structure):
170184
self.df_dup.loc[self.idx_dupe]
171185

172-
def time_iloc(self):
186+
def time_iloc(self, index, index_structure):
173187
self.df.iloc[:100, 0]
174188

175-
def time_loc(self):
189+
def time_loc(self, index, index_structure):
176190
self.df.loc[:100, 0]
177191

178-
def time_bool_indexer(self):
192+
def time_bool_indexer(self, index, index_structure):
179193
self.df[self.bool_indexer]
180194

181195

0 commit comments

Comments
 (0)