@@ -183,62 +183,6 @@ def time_left_outer_join_index(self):
183
183
self .left .join (self .right , on = 'jim' )
184
184
185
185
186
- class merge_asof_noby (object ):
187
-
188
- def setup (self ):
189
- np .random .seed (0 )
190
- one_count = 200000
191
- two_count = 1000000
192
- self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
193
- 'value1' : np .random .randn (one_count )})
194
- self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
195
- 'value2' : np .random .randn (two_count )})
196
- self .df1 = self .df1 .sort_values ('time' )
197
- self .df2 = self .df2 .sort_values ('time' )
198
-
199
- def time_merge_asof_noby (self ):
200
- merge_asof (self .df1 , self .df2 , on = 'time' )
201
-
202
-
203
- class merge_asof_by_object (object ):
204
-
205
- def setup (self ):
206
- import string
207
- np .random .seed (0 )
208
- one_count = 200000
209
- two_count = 1000000
210
- self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
211
- 'key' : np .random .choice (list (string .uppercase ), one_count ),
212
- 'value1' : np .random .randn (one_count )})
213
- self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
214
- 'key' : np .random .choice (list (string .uppercase ), two_count ),
215
- 'value2' : np .random .randn (two_count )})
216
- self .df1 = self .df1 .sort_values ('time' )
217
- self .df2 = self .df2 .sort_values ('time' )
218
-
219
- def time_merge_asof_by_object (self ):
220
- merge_asof (self .df1 , self .df2 , on = 'time' , by = 'key' )
221
-
222
-
223
- class merge_asof_by_int (object ):
224
-
225
- def setup (self ):
226
- np .random .seed (0 )
227
- one_count = 200000
228
- two_count = 1000000
229
- self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
230
- 'key' : np .random .randint (0 , 25 , one_count ),
231
- 'value1' : np .random .randn (one_count )})
232
- self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
233
- 'key' : np .random .randint (0 , 25 , two_count ),
234
- 'value2' : np .random .randn (two_count )})
235
- self .df1 = self .df1 .sort_values ('time' )
236
- self .df2 = self .df2 .sort_values ('time' )
237
-
238
- def time_merge_asof_by_int (self ):
239
- merge_asof (self .df1 , self .df2 , on = 'time' , by = 'key' )
240
-
241
-
242
186
class join_non_unique_equal (object ):
243
187
# outer join of non-unique
244
188
# GH 6329
@@ -333,6 +277,48 @@ def time_merge_ordered(self):
333
277
merge_ordered (self .left , self .right , on = 'key' , left_by = 'group' )
334
278
335
279
280
+ # ----------------------------------------------------------------------
281
+ # asof merge
282
+
283
+ class MergeAsof (object ):
284
+
285
+ def setup (self ):
286
+ import string
287
+ np .random .seed (0 )
288
+ one_count = 200000
289
+ two_count = 1000000
290
+
291
+ self .df1 = pd .DataFrame (
292
+ {'time' : np .random .randint (0 , one_count / 20 , one_count ),
293
+ 'key' : np .random .choice (list (string .uppercase ), one_count ),
294
+ 'key2' : np .random .randint (0 , 25 , one_count ),
295
+ 'value1' : np .random .randn (one_count )})
296
+ self .df2 = pd .DataFrame (
297
+ {'time' : np .random .randint (0 , two_count / 20 , two_count ),
298
+ 'key' : np .random .choice (list (string .uppercase ), two_count ),
299
+ 'key2' : np .random .randint (0 , 25 , two_count ),
300
+ 'value2' : np .random .randn (two_count )})
301
+
302
+ self .df1 = self .df1 .sort_values ('time' )
303
+ self .df2 = self .df2 .sort_values ('time' )
304
+
305
+ self .df1a = self .df1 [['time' , 'value1' ]]
306
+ self .df2a = self .df2 [['time' , 'value2' ]]
307
+ self .df1b = self .df1 [['time' , 'key' , 'value1' ]]
308
+ self .df2b = self .df2 [['time' , 'key' , 'value2' ]]
309
+ self .df1c = self .df1 [['time' , 'key2' , 'value1' ]]
310
+ self .df2c = self .df2 [['time' , 'key2' , 'value2' ]]
311
+
312
+ def time_noby (self ):
313
+ merge_asof (self .df1a , self .df2a , on = 'time' )
314
+
315
+ def time_by_object (self ):
316
+ merge_asof (self .df1b , self .df2b , on = 'time' , by = 'key' )
317
+
318
+ def time_by_int (self ):
319
+ merge_asof (self .df1c , self .df2c , on = 'time' , by = 'key2' )
320
+
321
+
336
322
#----------------------------------------------------------------------
337
323
# data alignment
338
324
0 commit comments