@@ -310,6 +310,25 @@ def time_merge_asof_noby(self):
310
310
merge_asof (self .df1 , self .df2 , on = 'time' )
311
311
312
312
313
+ class merge_asof_int32_noby (object ):
314
+
315
+ def setup (self ):
316
+ np .random .seed (0 )
317
+ one_count = 200000
318
+ two_count = 1000000
319
+ self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
320
+ 'value1' : np .random .randn (one_count )})
321
+ self .df1 .time = np .int32 (self .df1 .time )
322
+ self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
323
+ 'value2' : np .random .randn (two_count )})
324
+ self .df2 .time = np .int32 (self .df2 .time )
325
+ self .df1 = self .df1 .sort_values ('time' )
326
+ self .df2 = self .df2 .sort_values ('time' )
327
+
328
+ def time_merge_asof_int32_noby (self ):
329
+ merge_asof (self .df1 , self .df2 , on = 'time' )
330
+
331
+
313
332
class merge_asof_by_object (object ):
314
333
315
334
def setup (self ):
@@ -318,10 +337,10 @@ def setup(self):
318
337
one_count = 200000
319
338
two_count = 1000000
320
339
self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
321
- 'key' : np .random .choice (list (string .uppercase ), one_count ),
340
+ 'key' : np .random .choice (list (string .ascii_uppercase ), one_count ),
322
341
'value1' : np .random .randn (one_count )})
323
342
self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
324
- 'key' : np .random .choice (list (string .uppercase ), two_count ),
343
+ 'key' : np .random .choice (list (string .ascii_uppercase ), two_count ),
325
344
'value2' : np .random .randn (two_count )})
326
345
self .df1 = self .df1 .sort_values ('time' )
327
346
self .df2 = self .df2 .sort_values ('time' )
@@ -349,6 +368,28 @@ def time_merge_asof_by_int(self):
349
368
merge_asof (self .df1 , self .df2 , on = 'time' , by = 'key' )
350
369
351
370
371
+ class merge_asof_multiby (object ):
372
+
373
+ def setup (self ):
374
+ import string
375
+ np .random .seed (0 )
376
+ one_count = 200000
377
+ two_count = 1000000
378
+ self .df1 = pd .DataFrame ({'time' : np .random .randint (0 , one_count / 20 , one_count ),
379
+ 'key1' : np .random .choice (list (string .ascii_uppercase ), one_count ),
380
+ 'key2' : np .random .choice (list (string .ascii_uppercase ), one_count ),
381
+ 'value1' : np .random .randn (one_count )})
382
+ self .df2 = pd .DataFrame ({'time' : np .random .randint (0 , two_count / 20 , two_count ),
383
+ 'key1' : np .random .choice (list (string .ascii_uppercase ), two_count ),
384
+ 'key2' : np .random .choice (list (string .ascii_uppercase ), two_count ),
385
+ 'value2' : np .random .randn (two_count )})
386
+ self .df1 = self .df1 .sort_values ('time' )
387
+ self .df2 = self .df2 .sort_values ('time' )
388
+
389
+ def time_merge_asof_multiby (self ):
390
+ merge_asof (self .df1 , self .df2 , on = 'time' , by = ['key1' , 'key2' ])
391
+
392
+
352
393
class join_non_unique_equal (object ):
353
394
goal_time = 0.2
354
395
0 commit comments