Skip to content

Commit 75157fc

Browse files
author
Christopher C. Aycock
committed
merge_asof() has type specializations and can take multiple 'by' parameters (pandas-dev#13936)
1 parent 3552dc0 commit 75157fc

File tree

5 files changed

+1783
-80
lines changed

5 files changed

+1783
-80
lines changed

asv_bench/benchmarks/join_merge.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,25 @@ def time_merge_asof_noby(self):
310310
merge_asof(self.df1, self.df2, on='time')
311311

312312

313+
class merge_asof_int32_noby(object):
314+
315+
def setup(self):
316+
np.random.seed(0)
317+
one_count = 200000
318+
two_count = 1000000
319+
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
320+
'value1': np.random.randn(one_count)})
321+
self.df1.time = np.int32(self.df1.time)
322+
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
323+
'value2': np.random.randn(two_count)})
324+
self.df2.time = np.int32(self.df2.time)
325+
self.df1 = self.df1.sort_values('time')
326+
self.df2 = self.df2.sort_values('time')
327+
328+
def time_merge_asof_int32_noby(self):
329+
merge_asof(self.df1, self.df2, on='time')
330+
331+
313332
class merge_asof_by_object(object):
314333

315334
def setup(self):
@@ -318,10 +337,10 @@ def setup(self):
318337
one_count = 200000
319338
two_count = 1000000
320339
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
321-
'key': np.random.choice(list(string.uppercase), one_count),
340+
'key': np.random.choice(list(string.ascii_uppercase), one_count),
322341
'value1': np.random.randn(one_count)})
323342
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
324-
'key': np.random.choice(list(string.uppercase), two_count),
343+
'key': np.random.choice(list(string.ascii_uppercase), two_count),
325344
'value2': np.random.randn(two_count)})
326345
self.df1 = self.df1.sort_values('time')
327346
self.df2 = self.df2.sort_values('time')
@@ -349,6 +368,28 @@ def time_merge_asof_by_int(self):
349368
merge_asof(self.df1, self.df2, on='time', by='key')
350369

351370

371+
class merge_asof_multiby(object):
372+
373+
def setup(self):
374+
import string
375+
np.random.seed(0)
376+
one_count = 200000
377+
two_count = 1000000
378+
self.df1 = pd.DataFrame({'time': np.random.randint(0, one_count/20, one_count),
379+
'key1': np.random.choice(list(string.ascii_uppercase), one_count),
380+
'key2': np.random.choice(list(string.ascii_uppercase), one_count),
381+
'value1': np.random.randn(one_count)})
382+
self.df2 = pd.DataFrame({'time': np.random.randint(0, two_count/20, two_count),
383+
'key1': np.random.choice(list(string.ascii_uppercase), two_count),
384+
'key2': np.random.choice(list(string.ascii_uppercase), two_count),
385+
'value2': np.random.randn(two_count)})
386+
self.df1 = self.df1.sort_values('time')
387+
self.df2 = self.df2.sort_values('time')
388+
389+
def time_merge_asof_multiby(self):
390+
merge_asof(self.df1, self.df2, on='time', by=['key1', 'key2'])
391+
392+
352393
class join_non_unique_equal(object):
353394
goal_time = 0.2
354395

0 commit comments

Comments
 (0)