Skip to content

CLN: ASV binary ops benchmark #18444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 25, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 44 additions & 47 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from .pandas_vb_common import *
import numpy as np
from pandas import DataFrame, Series, date_range
try:
import pandas.core.computation.expressions as expr
except ImportError:
import pandas.computation.expressions as expr


class Ops(object):

goal_time = 0.2

params = [[True, False], ['default', 1]]
param_names = ['use_numexpr', 'threads']

def setup(self, use_numexpr, threads):
np.random.seed(1234)
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

Expand All @@ -20,94 +23,88 @@ def setup(self, use_numexpr, threads):
if not use_numexpr:
expr.set_use_numexpr(False)


def time_frame_add(self, use_numexpr, threads):
(self.df + self.df2)
self.df + self.df2

def time_frame_mult(self, use_numexpr, threads):
(self.df * self.df2)
self.df * self.df2

def time_frame_multi_and(self, use_numexpr, threads):
self.df[((self.df > 0) & (self.df2 > 0))]
self.df[(self.df > 0) & (self.df2 > 0)]

def time_frame_comparison(self, use_numexpr, threads):
(self.df > self.df2)
self.df > self.df2

def teardown(self, use_numexpr, threads):
expr.set_use_numexpr(True)
expr.set_numexpr_threads()


class Ops2(object):

goal_time = 0.2

def setup(self):
self.df = DataFrame(np.random.randn(1000, 1000))
self.df2 = DataFrame(np.random.randn(1000, 1000))
N = 10**3
np.random.seed(1234)
self.df = DataFrame(np.random.randn(N, N))
self.df2 = DataFrame(np.random.randn(N, N))

self.df_int = DataFrame(
np.random.random_integers(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(1000, 1000)))
self.df2_int = DataFrame(
np.random.random_integers(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(1000, 1000)))
self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))
self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))

## Division
# Division

def time_frame_float_div(self):
(self.df // self.df2)
self.df // self.df2

def time_frame_float_div_by_zero(self):
(self.df / 0)
self.df / 0

def time_frame_float_floor_by_zero(self):
(self.df // 0)
self.df // 0

def time_frame_int_div_by_zero(self):
(self.df_int / 0)
self.df_int / 0

## Modulo
# Modulo

def time_frame_int_mod(self):
(self.df / self.df2)
self.df_int % self.df2_int

def time_frame_float_mod(self):
(self.df / self.df2)
self.df % self.df2


class Timeseries(object):

goal_time = 0.2

def setup(self):
self.N = 1000000
params = [None, 'US/Eastern']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

http://asv.readthedocs.io/en/latest/writing_benchmarks.html

we can add params_names (here) and elsewhere (just for readibility)

param_names = ['tz']

def setup(self, tz):
self.N = 10**6
self.halfway = ((self.N // 2) - 1)
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
self.s = Series(date_range('20010101', periods=self.N, freq='T',
tz=tz))
self.ts = self.s[self.halfway]

self.s2 = Series(date_range('20010101', periods=self.N, freq='s'))
self.s2 = Series(date_range('20010101', periods=self.N, freq='s',
tz=tz))

def time_series_timestamp_compare(self):
(self.s <= self.ts)
def time_series_timestamp_compare(self, tz):
self.s <= self.ts

def time_timestamp_series_compare(self):
(self.ts >= self.s)
def time_timestamp_series_compare(self, tz):
self.ts >= self.s

def time_timestamp_ops_diff1(self):
def time_timestamp_ops_diff(self, tz):
self.s2.diff()

def time_timestamp_ops_diff2(self):
(self.s - self.s.shift())



class TimeseriesTZ(Timeseries):

def setup(self):
self.N = 1000000
self.halfway = ((self.N // 2) - 1)
self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
self.ts = self.s[self.halfway]

self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
def time_timestamp_ops_diff_with_shift(self, tz):
self.s - self.s.shift()