-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: ASV inference benchmark #18759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: ASV inference benchmark #18759
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,117 +1,114 @@ | ||
from .pandas_vb_common import * | ||
import pandas as pd | ||
import numpy as np | ||
import pandas.util.testing as tm | ||
import pandas._libs.lib as lib | ||
from pandas import DataFrame, Series, to_numeric | ||
|
||
from .pandas_vb_common import setup # noqa | ||
|
||
class DtypeInfer(object): | ||
goal_time = 0.2 | ||
|
||
class NumericInferOps(object): | ||
# from GH 7332 | ||
goal_time = 0.2 | ||
params = ['int64', 'int32', 'uint32', 'float32', 'float64'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add uint64. you can add int16, int8, uint16, uint8 as well to cover the bases There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we may want to define these numeric dtypes elsewhere and import here (so we are consistent across the asv), and prob for datetimelike as well |
||
param_names = ['dtype'] | ||
|
||
def setup(self, dtype): | ||
N = 5 * 10**5 | ||
self.df = DataFrame({'A': np.arange(N).astype(dtype), | ||
'B': np.arange(N).astype(dtype)}) | ||
|
||
def setup(self): | ||
self.N = 500000 | ||
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), | ||
B=np.arange(self.N, dtype='int64'))) | ||
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), | ||
B=np.arange(self.N, dtype='int32'))) | ||
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), | ||
B=np.arange(self.N, dtype='uint32'))) | ||
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), | ||
B=np.arange(self.N, dtype='float64'))) | ||
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), | ||
B=np.arange(self.N, dtype='float32'))) | ||
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), | ||
B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'))) | ||
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), | ||
B=self.df_datetime64['B'])) | ||
def time_add(self, dtype): | ||
self.df['A'] + self.df['B'] | ||
|
||
def time_int64(self): | ||
(self.df_int64['A'] + self.df_int64['B']) | ||
def time_subtract(self, dtype): | ||
self.df['A'] - self.df['B'] | ||
|
||
def time_int32(self): | ||
(self.df_int32['A'] + self.df_int32['B']) | ||
def time_multiply(self, dtype): | ||
self.df['A'] * self.df['B'] | ||
|
||
def time_uint32(self): | ||
(self.df_uint32['A'] + self.df_uint32['B']) | ||
def time_divide(self, dtype): | ||
self.df['A'] / self.df['B'] | ||
|
||
def time_float64(self): | ||
(self.df_float64['A'] + self.df_float64['B']) | ||
def time_modulo(self, dtype): | ||
self.df['A'] % self.df['B'] | ||
|
||
def time_float32(self): | ||
(self.df_float32['A'] + self.df_float32['B']) | ||
|
||
def time_datetime64(self): | ||
(self.df_datetime64['A'] - self.df_datetime64['B']) | ||
class DateInferOps(object): | ||
# from GH 7332 | ||
goal_time = 0.2 | ||
|
||
def setup_cache(self): | ||
N = 5 * 10**5 | ||
df = DataFrame({'datetime64': np.arange(N).astype('datetime64[ms]')}) | ||
df['timedelta'] = df['datetime64'] - df['datetime64'] | ||
return df | ||
|
||
def time_timedelta64_1(self): | ||
(self.df_timedelta64['A'] + self.df_timedelta64['B']) | ||
def time_subtract_datetimes(self, df): | ||
df['datetime64'] - df['datetime64'] | ||
|
||
def time_timedelta64_2(self): | ||
(self.df_timedelta64['A'] + self.df_timedelta64['A']) | ||
def time_timedelta_plus_datetime(self, df): | ||
df['timedelta'] + df['datetime64'] | ||
|
||
def time_add_timedeltas(self, df): | ||
df['timedelta'] + df['timedelta'] | ||
|
||
|
||
class ToNumeric(object): | ||
|
||
class to_numeric(object): | ||
goal_time = 0.2 | ||
params = ['ignore', 'coerce'] | ||
param_names = ['errors'] | ||
|
||
def setup(self): | ||
self.n = 10000 | ||
self.float = Series(np.random.randn(self.n * 100)) | ||
def setup(self, errors): | ||
N = 10000 | ||
self.float = Series(np.random.randn(N)) | ||
self.numstr = self.float.astype('str') | ||
self.str = Series(tm.makeStringIndex(self.n)) | ||
|
||
def time_from_float(self): | ||
pd.to_numeric(self.float) | ||
self.str = Series(tm.makeStringIndex(N)) | ||
|
||
def time_from_numeric_str(self): | ||
pd.to_numeric(self.numstr) | ||
def time_from_float(self, errors): | ||
to_numeric(self.float, errors=errors) | ||
|
||
def time_from_str_ignore(self): | ||
pd.to_numeric(self.str, errors='ignore') | ||
def time_from_numeric_str(self, errors): | ||
to_numeric(self.numstr, errors=errors) | ||
|
||
def time_from_str_coerce(self): | ||
pd.to_numeric(self.str, errors='coerce') | ||
def time_from_str(self, errors): | ||
to_numeric(self.str, errors=errors) | ||
|
||
|
||
class to_numeric_downcast(object): | ||
class ToNumericDowncast(object): | ||
|
||
param_names = ['dtype', 'downcast'] | ||
params = [['string-float', 'string-int', 'string-nint', 'datetime64', | ||
'int-list', 'int32'], | ||
[None, 'integer', 'signed', 'unsigned', 'float']] | ||
|
||
N = 500000 | ||
N2 = int(N / 2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was for a good reason, you can't use floats to multiply lists (I find it a bit strange that this didn't fail for you) |
||
|
||
data_dict = { | ||
'string-int': (['1'] * N2) + ([2] * N2), | ||
'string-nint': (['-1'] * N2) + ([2] * N2), | ||
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'], | ||
dtype='datetime64[D]'), N), | ||
'string-float': (['1.1'] * N2) + ([2] * N2), | ||
'int-list': ([1] * N2) + ([2] * N2), | ||
'int32': np.repeat(np.int32(1), N) | ||
} | ||
N2 = N / 2 | ||
|
||
data_dict = {'string-int': ['1'] * N2 + [2] * N2, | ||
'string-nint': ['-1'] * N2 + [2] * N2, | ||
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'], | ||
dtype='datetime64[D]'), N), | ||
'string-float': ['1.1'] * N2 + [2] * N2, | ||
'int-list': [1] * N2 + [2] * N2, | ||
'int32': np.repeat(np.int32(1), N)} | ||
|
||
def setup(self, dtype, downcast): | ||
self.data = self.data_dict[dtype] | ||
|
||
def time_downcast(self, dtype, downcast): | ||
pd.to_numeric(self.data, downcast=downcast) | ||
to_numeric(self.data, downcast=downcast) | ||
|
||
|
||
class MaybeConvertNumeric(object): | ||
|
||
def setup(self): | ||
n = 1000000 | ||
arr = np.repeat([2**63], n) | ||
arr = arr + np.arange(n).astype('uint64') | ||
arr = np.array([arr[i] if i%2 == 0 else | ||
str(arr[i]) for i in range(n)], | ||
dtype=object) | ||
|
||
arr[-1] = -1 | ||
self.data = arr | ||
self.na_values = set() | ||
|
||
def time_convert(self): | ||
lib.maybe_convert_numeric(self.data, self.na_values, | ||
coerce_numeric=False) | ||
def setup_cache(self): | ||
N = 10**6 | ||
arr = np.repeat([2**63], N) + np.arange(N).astype('uint64') | ||
data = arr.astype(object) | ||
data[1::2] = arr[1::2].astype(str) | ||
data[-1] = -1 | ||
return data | ||
|
||
def time_convert(self, data): | ||
lib.maybe_convert_numeric(data, set(), coerce_numeric=False) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so this is not backward compat before 0.20., but I think ok for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you recall what this import would be pre 0.20?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
from pandas import lib
(this is in fact whatpandas/lib.py
does now, but that will get blown away in 0.22There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can import lib from
pandas_vb_common.py
, the back compat is handled there