Skip to content

Add asv benchmarks for essential functions #23935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 27, 2018
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def setup(self):
np.iinfo(np.int16).max,
size=(N, N)))

self.s = Series(np.random.randn(N))

# Division

def time_frame_float_div(self):
Expand All @@ -74,6 +76,17 @@ def time_frame_int_mod(self):
def time_frame_float_mod(self):
self.df % self.df2

# Dot product

def time_frame_dot(self):
self.df.dot(self.df2)

def time_series_dot(self):
self.s.dot(self.s)

def time_frame_series_dot(self):
self.df.dot(self.s)


class Timeseries(object):

Expand Down
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,36 @@ def time_reindex_upcast(self):
self.df2.reindex(np.random.permutation(range(1200)))


class Rename(object):

def setup(self):
N = 10**3
self.df = DataFrame(np.random.randn(N * 10, N))
self.idx = np.arange(4 * N, 7 * N)
self.dict_idx = {k: k for k in self.idx}
self.df2 = DataFrame(
{c: {0: np.random.randint(0, 2, N).astype(np.bool_),
1: np.random.randint(0, N, N).astype(np.int16),
2: np.random.randint(0, N, N).astype(np.int32),
3: np.random.randint(0, N, N).astype(np.int64)}
[np.random.randint(0, 4)] for c in range(N)})

def time_rename_single(self):
self.df.rename({0: 0})

def time_rename_axis0(self):
self.df.rename(self.dict_idx)

def time_rename_axis1(self):
self.df.rename(columns=self.dict_idx)

def time_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)

def time_dict_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)


class Iteration(object):

def setup(self):
Expand Down
53 changes: 42 additions & 11 deletions asv_bench/benchmarks/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,48 @@
matplotlib.use('Agg')


class Plotting(object):

def setup(self):
self.s = Series(np.random.randn(1000000))
self.df = DataFrame({'col': self.s})

def time_series_plot(self):
self.s.plot()

def time_frame_plot(self):
self.df.plot()
class SeriesPlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde']:
n = 10000
else:
n = 1000000

self.s = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.s = self.s.abs()

def time_series_plot(self, kind):
self.s.plot(kind=kind)


class FramePlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
'hexbin']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde', 'scatter', 'hexbin']:
n = 10000
else:
n = 1000000

self.x = Series(np.random.randn(n))
self.y = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.x = self.x.abs()
self.y = self.y.abs()
self.df = DataFrame({'x': self.x, 'y': self.y})

def time_frame_plot(self, kind):
self.df.plot(x='x', y='y', kind=kind)


class TimeseriesPlotting(object):
Expand Down
38 changes: 38 additions & 0 deletions asv_bench/benchmarks/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
pd.get_dummies(self.s, sparse=True)


class Cut(object):
params = [[4, 10, 1000]]
param_names = ['bins']

def setup(self, bins):
N = 10**5
self.int_series = pd.Series(np.arange(N).repeat(5))
self.float_series = pd.Series(np.random.randn(N).repeat(5))
self.timedelta_series = pd.Series(np.random.randint(N, size=N),
dtype='timedelta64[ns]')
self.datetime_series = pd.Series(np.random.randint(N, size=N),
dtype='datetime64[ns]')

def time_cut_int(self, bins):
pd.cut(self.int_series, bins)

def time_cut_float(self, bins):
pd.cut(self.float_series, bins)

def time_cut_timedelta(self, bins):
pd.cut(self.timedelta_series, bins)

def time_cut_datetime(self, bins):
pd.cut(self.datetime_series, bins)

def time_qcut_int(self, bins):
pd.qcut(self.int_series, bins)

def time_qcut_float(self, bins):
pd.qcut(self.float_series, bins)

def time_qcut_timedelta(self, bins):
pd.qcut(self.timedelta_series, bins)

def time_qcut_datetime(self, bins):
pd.qcut(self.datetime_series, bins)


from .pandas_vb_common import setup # noqa: F401
36 changes: 36 additions & 0 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
getattr(self.roll, method)()


class ExpandingMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
['int', 'float'],
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
'sum'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.expanding = getattr(pd, constructor)(arr).expanding()

def time_expanding(self, constructor, dtype, method):
getattr(self.expanding, method)()


class EWMMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
['mean', 'std'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, window, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)

def time_ewm(self, constructor, window, dtype, method):
getattr(self.ewm, method)()


class VariableWindowMethods(Methods):
sample_time = 0.2
params = (['DataFrame', 'Series'],
Expand Down
36 changes: 32 additions & 4 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):

class Correlation(object):

params = ['spearman', 'kendall', 'pearson']
param_names = ['method']
params = [['spearman', 'kendall', 'pearson'], [True, False]]
param_names = ['method', 'use_bottleneck']

def setup(self, method):
def setup(self, method, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df = pd.DataFrame(np.random.randn(1000, 30))
self.s = pd.Series(np.random.randn(1000))
self.s2 = pd.Series(np.random.randn(1000))

def time_corr(self, method):
def time_corr(self, method, use_bottleneck):
self.df.corr(method=method)

def time_corr_series(self, method, use_bottleneck):
self.s.corr(self.s2, method=method)


class Covariance(object):

params = [[True, False]]
param_names = ['use_bottleneck']

def setup(self, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s = pd.Series(np.random.randn(100000))
self.s2 = pd.Series(np.random.randn(100000))

def time_cov_series(self, use_bottleneck):
self.s.cov(self.s2)


from .pandas_vb_common import setup # noqa: F401
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,42 @@ def time_extract(self):
def time_findall(self):
self.s.str.findall('[A-Z]+')

def time_find(self):
self.s.str.find('[A-Z]+')

def time_rfind(self):
self.s.str.rfind('[A-Z]+')

def time_get(self):
self.s.str.get(0)

def time_len(self):
self.s.str.len()

def time_join(self):
self.s.str.join(' ')

def time_match(self):
self.s.str.match('A')

def time_normalize(self):
self.s.str.normalize('NFC')

def time_pad(self):
self.s.str.pad(100, side='both')

def time_partition(self):
self.s.str.partition('A')

def time_rpartition(self):
self.s.str.rpartition('A')

def time_replace(self):
self.s.str.replace('A', '\x01\x01')

def time_translate(self):
self.s.str.translate({'A': '\x01\x01'})

def time_slice(self):
self.s.str.slice(5, 15, 2)

Expand All @@ -65,6 +86,12 @@ def time_upper(self):
def time_lower(self):
self.s.str.lower()

def time_wrap(self):
self.s.str.wrap(10)

def time_zfill(self):
self.s.str.zfill(10)


class Repeat(object):

Expand Down Expand Up @@ -129,6 +156,9 @@ def setup(self, expand):
def time_split(self, expand):
self.s.str.split('--', expand=expand)

def time_rsplit(self, expand):
self.s.str.rsplit('--', expand=expand)


class Dummies(object):

Expand Down
36 changes: 35 additions & 1 deletion asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import datetime

import numpy as np
from pandas import Series, timedelta_range, to_timedelta, Timestamp, Timedelta
from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
Timedelta, TimedeltaIndex, DataFrame


class TimedeltaConstructor(object):
Expand Down Expand Up @@ -116,3 +117,36 @@ def time_timedelta_microseconds(self, series):

def time_timedelta_nanoseconds(self, series):
series.dt.nanoseconds


class TimedeltaIndexing(object):

def setup(self):
self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.timedelta = self.index[500]

def time_get_loc(self):
self.index.get_loc(self.timedelta)

def time_shape(self):
self.index.shape

def time_shallow_copy(self):
self.index._shallow_copy()

def time_series_loc(self):
self.series.loc[self.timedelta]

def time_align(self):
DataFrame({'a': self.series, 'b': self.series[:500]})

def time_intersection(self):
self.index.intersection(self.index2)

def time_union(self):
self.index.union(self.index2)

def time_unique(self):
self.index.unique()