-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: ASV stat_ops #19049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: ASV stat_ops #19049
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,205 +1,115 @@ | ||
from .pandas_vb_common import * | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def _set_use_bottleneck_False(): | ||
try: | ||
pd.options.compute.use_bottleneck = False | ||
except: | ||
from pandas.core import nanops | ||
nanops._USE_BOTTLENECK = False | ||
from .pandas_vb_common import setup # noqa | ||
|
||
|
||
class FrameOps(object): | ||
goal_time = 0.2 | ||
|
||
goal_time = 0.2 | ||
param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] | ||
params = [['mean', 'sum', 'median'], | ||
params = [['mean', 'sum', 'median', 'std'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and use bottleneck here, also expand these to all of the stat ops (min, max, var, kurt, etc) |
||
[True, False], | ||
['float', 'int'], | ||
[0, 1]] | ||
|
||
def setup(self, op, use_bottleneck, dtype, axis): | ||
if dtype == 'float': | ||
self.df = DataFrame(np.random.randn(100000, 4)) | ||
elif dtype == 'int': | ||
self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) | ||
|
||
if not use_bottleneck: | ||
_set_use_bottleneck_False() | ||
|
||
self.func = getattr(self.df, op) | ||
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) | ||
try: | ||
pd.options.compute.use_bottleneck = use_bottleneck | ||
except: | ||
from pandas.core import nanops | ||
nanops._USE_BOTTLENECK = use_bottleneck | ||
self.df_func = getattr(df, op) | ||
|
||
def time_op(self, op, use_bottleneck, dtype, axis): | ||
self.func(axis=axis) | ||
|
||
|
||
class stat_ops_level_frame_sum(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) | ||
random.shuffle(self.index.values) | ||
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) | ||
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) | ||
|
||
def time_stat_ops_level_frame_sum(self): | ||
self.df.sum(level=1) | ||
|
||
|
||
class stat_ops_level_frame_sum_multiple(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) | ||
random.shuffle(self.index.values) | ||
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) | ||
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) | ||
|
||
def time_stat_ops_level_frame_sum_multiple(self): | ||
self.df.sum(level=[0, 1]) | ||
|
||
|
||
class stat_ops_level_series_sum(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) | ||
random.shuffle(self.index.values) | ||
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) | ||
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) | ||
|
||
def time_stat_ops_level_series_sum(self): | ||
self.df[1].sum(level=1) | ||
|
||
|
||
class stat_ops_level_series_sum_multiple(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) | ||
random.shuffle(self.index.values) | ||
self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) | ||
self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) | ||
self.df_func(axis=axis) | ||
|
||
def time_stat_ops_level_series_sum_multiple(self): | ||
self.df[1].sum(level=[0, 1]) | ||
|
||
class FrameMultiIndexOps(object): | ||
|
||
class stat_ops_series_std(object): | ||
goal_time = 0.2 | ||
params = ([0, 1, [0, 1]], ['mean', 'sum', 'median']) | ||
param_names = ['level', 'op'] | ||
|
||
def setup(self): | ||
self.s = Series(np.random.randn(100000), index=np.arange(100000)) | ||
self.s[::2] = np.nan | ||
def setup(self, level, op): | ||
levels = [np.arange(10), np.arange(100), np.arange(100)] | ||
labels = [np.arange(10).repeat(10000), | ||
np.tile(np.arange(100).repeat(100), 10), | ||
np.tile(np.tile(np.arange(100), 100), 10)] | ||
index = pd.MultiIndex(levels=levels, labels=labels) | ||
df = pd.DataFrame(np.random.randn(len(index), 4), index=index) | ||
self.df_func = getattr(df, op) | ||
|
||
def time_stat_ops_series_std(self): | ||
self.s.std() | ||
def time_op(self, level, op): | ||
self.df_func(level=level) | ||
|
||
|
||
class stats_corr_spearman(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(1000, 30)) | ||
|
||
def time_stats_corr_spearman(self): | ||
self.df.corr(method='spearman') | ||
class SeriesOps(object): | ||
|
||
|
||
class stats_rank2d_axis0_average(object): | ||
goal_time = 0.2 | ||
param_names = ['op', 'use_bottleneck', 'dtype'] | ||
params = [['mean', 'sum', 'median', 'std'], | ||
[True, False], | ||
['float', 'int']] | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(5000, 50)) | ||
|
||
def time_stats_rank2d_axis0_average(self): | ||
self.df.rank() | ||
|
||
|
||
class stats_rank2d_axis1_average(object): | ||
goal_time = 0.2 | ||
def setup(self, op, use_bottleneck, dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could put the bottleneck tests in a separate function / base class I think |
||
s = pd.Series(np.random.randn(100000)).astype(dtype) | ||
try: | ||
pd.options.compute.use_bottleneck = use_bottleneck | ||
except: | ||
from pandas.core import nanops | ||
nanops._USE_BOTTLENECK = use_bottleneck | ||
self.s_func = getattr(s, op) | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(5000, 50)) | ||
def time_op(self, op, use_bottleneck, dtype): | ||
self.s_func() | ||
|
||
def time_stats_rank2d_axis1_average(self): | ||
self.df.rank(1) | ||
|
||
class SeriesMultiIndexOps(object): | ||
|
||
class stats_rank_average(object): | ||
goal_time = 0.2 | ||
params = ([0, 1, [0, 1]], ['mean', 'sum', 'median']) | ||
param_names = ['level', 'op'] | ||
|
||
def setup(self): | ||
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) | ||
self.s = Series(self.values) | ||
|
||
def time_stats_rank_average(self): | ||
self.s.rank() | ||
|
||
|
||
class stats_rank_average_int(object): | ||
goal_time = 0.2 | ||
def setup(self, level, op): | ||
levels = [np.arange(10), np.arange(100), np.arange(100)] | ||
labels = [np.arange(10).repeat(10000), | ||
np.tile(np.arange(100).repeat(100), 10), | ||
np.tile(np.tile(np.arange(100), 100), 10)] | ||
index = pd.MultiIndex(levels=levels, labels=labels) | ||
s = pd.Series(np.random.randn(len(index)), index=index) | ||
self.s_func = getattr(s, op) | ||
|
||
def setup(self): | ||
self.values = np.random.randint(0, 100000, size=200000) | ||
self.s = Series(self.values) | ||
def time_op(self, level, op): | ||
self.s_func(level=level) | ||
|
||
def time_stats_rank_average_int(self): | ||
self.s.rank() | ||
|
||
class Rank(object): | ||
|
||
class stats_rank_pct_average(object): | ||
goal_time = 0.2 | ||
params = [['DataFrame', 'Series'], [True, False]] | ||
param_names = ['constructor', 'pct'] | ||
|
||
def setup(self): | ||
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) | ||
self.s = Series(self.values) | ||
|
||
def time_stats_rank_pct_average(self): | ||
self.s.rank(pct=True) | ||
|
||
def setup(self, constructor, pct): | ||
values = np.random.randn(10**5) | ||
self.data = getattr(pd, constructor)(values) | ||
|
||
class stats_rank_pct_average_old(object): | ||
goal_time = 0.2 | ||
def time_rank(self, constructor, pct): | ||
self.data.rank(pct=pct) | ||
|
||
def setup(self): | ||
self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) | ||
self.s = Series(self.values) | ||
def time_average_old(self, constructor, pct): | ||
self.data.rank(pct=pct) / len(self.data) | ||
|
||
def time_stats_rank_pct_average_old(self): | ||
(self.s.rank() / len(self.s)) | ||
|
||
class Correlation(object): | ||
|
||
class stats_rolling_mean(object): | ||
goal_time = 0.2 | ||
params = ['spearman', 'kendall', 'pearson'] | ||
param_names = ['method'] | ||
|
||
def setup(self): | ||
self.arr = np.random.randn(100000) | ||
self.win = 100 | ||
|
||
def time_rolling_mean(self): | ||
rolling_mean(self.arr, self.win) | ||
|
||
def time_rolling_median(self): | ||
rolling_median(self.arr, self.win) | ||
|
||
def time_rolling_min(self): | ||
rolling_min(self.arr, self.win) | ||
|
||
def time_rolling_max(self): | ||
rolling_max(self.arr, self.win) | ||
|
||
def time_rolling_sum(self): | ||
rolling_sum(self.arr, self.win) | ||
|
||
def time_rolling_std(self): | ||
rolling_std(self.arr, self.win) | ||
|
||
def time_rolling_var(self): | ||
rolling_var(self.arr, self.win) | ||
|
||
def time_rolling_skew(self): | ||
rolling_skew(self.arr, self.win) | ||
def setup(self, method): | ||
self.df = pd.DataFrame(np.random.randn(1000, 30)) | ||
|
||
def time_rolling_kurt(self): | ||
rolling_kurt(self.arr, self.win) | ||
def time_corr(self, method): | ||
self.df.corr(method=method) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could make use_bottleneck a param here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use_bottleneck is a param here, over
[True, False]