Skip to content

Commit 7a0ee19

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV inference benchmark (pandas-dev#18759)
1 parent fb178fc commit 7a0ee19

File tree

2 files changed

+77
-77
lines changed

2 files changed

+77
-77
lines changed

asv_bench/benchmarks/inference.py

+72-76
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,80 @@
1-
from .pandas_vb_common import *
2-
import pandas as pd
1+
import numpy as np
2+
import pandas.util.testing as tm
3+
from pandas import DataFrame, Series, to_numeric
34

5+
from .pandas_vb_common import numeric_dtypes, lib, setup # noqa
46

5-
class DtypeInfer(object):
6-
goal_time = 0.2
77

8+
class NumericInferOps(object):
89
# from GH 7332
10+
goal_time = 0.2
11+
params = numeric_dtypes
12+
param_names = ['dtype']
13+
14+
def setup(self, dtype):
15+
N = 5 * 10**5
16+
self.df = DataFrame({'A': np.arange(N).astype(dtype),
17+
'B': np.arange(N).astype(dtype)})
18+
19+
def time_add(self, dtype):
20+
self.df['A'] + self.df['B']
21+
22+
def time_subtract(self, dtype):
23+
self.df['A'] - self.df['B']
924

10-
def setup(self):
11-
self.N = 500000
12-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'),
13-
B=np.arange(self.N, dtype='int64')))
14-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'),
15-
B=np.arange(self.N, dtype='int32')))
16-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'),
17-
B=np.arange(self.N, dtype='uint32')))
18-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'),
19-
B=np.arange(self.N, dtype='float64')))
20-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'),
21-
B=np.arange(self.N, dtype='float32')))
22-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'),
23-
B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
24-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']),
25-
B=self.df_datetime64['B']))
25+
def time_multiply(self, dtype):
26+
self.df['A'] * self.df['B']
2627

27-
def time_int64(self):
28-
(self.df_int64['A'] + self.df_int64['B'])
28+
def time_divide(self, dtype):
29+
self.df['A'] / self.df['B']
2930

30-
def time_int32(self):
31-
(self.df_int32['A'] + self.df_int32['B'])
31+
def time_modulo(self, dtype):
32+
self.df['A'] % self.df['B']
3233

33-
def time_uint32(self):
34-
(self.df_uint32['A'] + self.df_uint32['B'])
3534

36-
def time_float64(self):
37-
(self.df_float64['A'] + self.df_float64['B'])
35+
class DateInferOps(object):
36+
# from GH 7332
37+
goal_time = 0.2
38+
39+
def setup_cache(self):
40+
N = 5 * 10**5
41+
df = DataFrame({'datetime64': np.arange(N).astype('datetime64[ms]')})
42+
df['timedelta'] = df['datetime64'] - df['datetime64']
43+
return df
3844

39-
def time_float32(self):
40-
(self.df_float32['A'] + self.df_float32['B'])
45+
def time_subtract_datetimes(self, df):
46+
df['datetime64'] - df['datetime64']
4147

42-
def time_datetime64(self):
43-
(self.df_datetime64['A'] - self.df_datetime64['B'])
48+
def time_timedelta_plus_datetime(self, df):
49+
df['timedelta'] + df['datetime64']
4450

45-
def time_timedelta64_1(self):
46-
(self.df_timedelta64['A'] + self.df_timedelta64['B'])
51+
def time_add_timedeltas(self, df):
52+
df['timedelta'] + df['timedelta']
4753

48-
def time_timedelta64_2(self):
49-
(self.df_timedelta64['A'] + self.df_timedelta64['A'])
5054

55+
class ToNumeric(object):
5156

52-
class to_numeric(object):
5357
goal_time = 0.2
58+
params = ['ignore', 'coerce']
59+
param_names = ['errors']
5460

55-
def setup(self):
56-
self.n = 10000
57-
self.float = Series(np.random.randn(self.n * 100))
61+
def setup(self, errors):
62+
N = 10000
63+
self.float = Series(np.random.randn(N))
5864
self.numstr = self.float.astype('str')
59-
self.str = Series(tm.makeStringIndex(self.n))
65+
self.str = Series(tm.makeStringIndex(N))
6066

61-
def time_from_float(self):
62-
pd.to_numeric(self.float)
67+
def time_from_float(self, errors):
68+
to_numeric(self.float, errors=errors)
6369

64-
def time_from_numeric_str(self):
65-
pd.to_numeric(self.numstr)
70+
def time_from_numeric_str(self, errors):
71+
to_numeric(self.numstr, errors=errors)
6672

67-
def time_from_str_ignore(self):
68-
pd.to_numeric(self.str, errors='ignore')
73+
def time_from_str(self, errors):
74+
to_numeric(self.str, errors=errors)
6975

70-
def time_from_str_coerce(self):
71-
pd.to_numeric(self.str, errors='coerce')
7276

73-
74-
class to_numeric_downcast(object):
77+
class ToNumericDowncast(object):
7578

7679
param_names = ['dtype', 'downcast']
7780
params = [['string-float', 'string-int', 'string-nint', 'datetime64',
@@ -81,37 +84,30 @@ class to_numeric_downcast(object):
8184
N = 500000
8285
N2 = int(N / 2)
8386

84-
data_dict = {
85-
'string-int': (['1'] * N2) + ([2] * N2),
86-
'string-nint': (['-1'] * N2) + ([2] * N2),
87-
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
88-
dtype='datetime64[D]'), N),
89-
'string-float': (['1.1'] * N2) + ([2] * N2),
90-
'int-list': ([1] * N2) + ([2] * N2),
91-
'int32': np.repeat(np.int32(1), N)
92-
}
87+
data_dict = {'string-int': ['1'] * N2 + [2] * N2,
88+
'string-nint': ['-1'] * N2 + [2] * N2,
89+
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
90+
dtype='datetime64[D]'), N),
91+
'string-float': ['1.1'] * N2 + [2] * N2,
92+
'int-list': [1] * N2 + [2] * N2,
93+
'int32': np.repeat(np.int32(1), N)}
9394

9495
def setup(self, dtype, downcast):
9596
self.data = self.data_dict[dtype]
9697

9798
def time_downcast(self, dtype, downcast):
98-
pd.to_numeric(self.data, downcast=downcast)
99+
to_numeric(self.data, downcast=downcast)
99100

100101

101102
class MaybeConvertNumeric(object):
102103

103-
def setup(self):
104-
n = 1000000
105-
arr = np.repeat([2**63], n)
106-
arr = arr + np.arange(n).astype('uint64')
107-
arr = np.array([arr[i] if i%2 == 0 else
108-
str(arr[i]) for i in range(n)],
109-
dtype=object)
110-
111-
arr[-1] = -1
112-
self.data = arr
113-
self.na_values = set()
114-
115-
def time_convert(self):
116-
lib.maybe_convert_numeric(self.data, self.na_values,
117-
coerce_numeric=False)
104+
def setup_cache(self):
105+
N = 10**6
106+
arr = np.repeat([2**63], N) + np.arange(N).astype('uint64')
107+
data = arr.astype(object)
108+
data[1::2] = arr[1::2].astype(str)
109+
data[-1] = -1
110+
return data
111+
112+
def time_convert(self, data):
113+
lib.maybe_convert_numeric(data, set(), coerce_numeric=False)

asv_bench/benchmarks/pandas_vb_common.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
except ImportError:
1515
pass
1616

17+
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
18+
np.float64, np.int16, np.int8, np.uint16, np.uint8]
19+
datetime_dtypes = [np.datetime64, np.timedelta64]
20+
1721
# This function just needs to be imported into each benchmark file in order to
1822
# sets up the random seed before each function.
1923
# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
@@ -39,7 +43,7 @@ def remove(self, f):
3943
def teardown(self, *args, **kwargs):
4044
self.remove(self.fname)
4145

42-
# try em until it works!
46+
# Compatability import for lib
4347
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
4448
try:
4549
lib = import_module(imp)

0 commit comments

Comments
 (0)