Skip to content

CI: Check ASV for failed benchmarks #19236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ matrix:
env:
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
# In allow_failures
- dist: trusty
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this after the doc build

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche prefered this before the doc build

#19236 (comment)

env:
- JOB="3.6_ASV" ASV=true
# In allow_failures
- dist: trusty
env:
- JOB="3.6_DOC" DOC=true
Expand All @@ -93,6 +97,9 @@ matrix:
- dist: trusty
env:
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
- dist: trusty
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche prefered this before the doc build

#19236 (comment)

env:
- JOB="3.6_ASV" ASV=true
- dist: trusty
env:
- JOB="3.6_DOC" DOC=true
Expand Down Expand Up @@ -128,6 +135,7 @@ script:
- ci/script_single.sh
- ci/script_multi.sh
- ci/lint.sh
- ci/asv.sh
- echo "checking imports"
- source activate pandas && python ci/check_imports.py
- echo "script done"
Expand Down
4 changes: 3 additions & 1 deletion asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from importlib import import_module

import numpy as np
Expand Down Expand Up @@ -83,7 +84,8 @@ def setup(self):
self.all = self.uniques.repeat(10)

def time_match_string(self):
pd.match(self.all, self.uniques)
with warnings.catch_warnings(record=True):
pd.match(self.all, self.uniques)


class Hashing(object):
Expand Down
10 changes: 8 additions & 2 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import pandas as pd
import pandas.util.testing as tm
Expand Down Expand Up @@ -119,11 +121,15 @@ def setup(self):

self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
self.s_str_cat = self.s_str.astype('category')
self.s_str_cat_ordered = self.s_str.astype('category', ordered=True)
with warnings.catch_warnings(record=True):
self.s_str_cat_ordered = self.s_str.astype('category',
ordered=True)

self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
self.s_int_cat = self.s_int.astype('category')
self.s_int_cat_ordered = self.s_int.astype('category', ordered=True)
with warnings.catch_warnings(record=True):
self.s_int_cat_ordered = self.s_int.astype('category',
ordered=True)

def time_rank_string(self):
self.s_str.rank()
Expand Down
9 changes: 6 additions & 3 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import string
import warnings

import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
Expand All @@ -15,7 +17,8 @@ def setup(self):
self.df = DataFrame(np.random.randn(10000, 25))
self.df['foo'] = 'bar'
self.df['bar'] = 'baz'
self.df = self.df.consolidate()
with warnings.catch_warnings(record=True):
self.df = self.df.consolidate()

def time_frame_get_numeric_data(self):
self.df._get_numeric_data()
Expand Down Expand Up @@ -141,8 +144,8 @@ class Repr(object):
def setup(self):
nrows = 10000
data = np.random.randn(nrows, 10)
idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100),
100))
arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
idx = MultiIndex.from_arrays(arrays)
self.df3 = DataFrame(data, index=idx)
self.df4 = DataFrame(data, index=np.random.randn(nrows))
self.df_tall = DataFrame(np.random.randn(nrows, 10))
Expand Down
43 changes: 20 additions & 23 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, rolling_median, rolling_mean,
rolling_min, rolling_max, rolling_var, rolling_skew,
rolling_kurt, rolling_std, read_csv, factorize, date_range)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be 'overhead' to keep, but we can also simply move those imports to withing the else statement, and then this keeps working for old releases.

from pandas import DataFrame, Series, read_csv, factorize, date_range
from pandas.core.algorithms import take_1d
try:
from pandas import (rolling_median, rolling_mean, rolling_min, rolling_max,
rolling_var, rolling_skew, rolling_kurt, rolling_std)
have_rolling_methods = True
except ImportError:
have_rolling_methods = False
try:
from pandas._libs import algos
except ImportError:
Expand Down Expand Up @@ -171,8 +175,7 @@ def run(period):
class ParallelRolling(object):

goal_time = 0.2
params = ['rolling_median', 'rolling_mean', 'rolling_min', 'rolling_max',
'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_std']
params = ['median', 'mean', 'min', 'max', 'var', 'skew', 'kurt', 'std']
param_names = ['method']

def setup(self, method):
Expand All @@ -181,34 +184,28 @@ def setup(self, method):
win = 100
arr = np.random.rand(100000)
if hasattr(DataFrame, 'rolling'):
rolling = {'rolling_median': 'median',
'rolling_mean': 'mean',
'rolling_min': 'min',
'rolling_max': 'max',
'rolling_var': 'var',
'rolling_skew': 'skew',
'rolling_kurt': 'kurt',
'rolling_std': 'std'}
df = DataFrame(arr).rolling(win)

@test_parallel(num_threads=2)
def parallel_rolling():
getattr(df, rolling[method])()
getattr(df, method)()
self.parallel_rolling = parallel_rolling
else:
rolling = {'rolling_median': rolling_median,
'rolling_mean': rolling_mean,
'rolling_min': rolling_min,
'rolling_max': rolling_max,
'rolling_var': rolling_var,
'rolling_skew': rolling_skew,
'rolling_kurt': rolling_kurt,
'rolling_std': rolling_std}
elif have_rolling_methods:
rolling = {'median': rolling_median,
'mean': rolling_mean,
'min': rolling_min,
'max': rolling_max,
'var': rolling_var,
'skew': rolling_skew,
'kurt': rolling_kurt,
'std': rolling_std}

@test_parallel(num_threads=2)
def parallel_rolling():
rolling[method](arr, win)
self.parallel_rolling = parallel_rolling
else:
raise NotImplementedError

def time_rolling(self, method):
self.parallel_rolling()
Expand Down
6 changes: 4 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from string import ascii_letters
from itertools import product
from functools import partial
Expand Down Expand Up @@ -340,7 +341,8 @@ def time_dt_size(self):
self.df.groupby(['dates']).size()

def time_dt_timegrouper_size(self):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
with warnings.catch_warnings(record=True):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()

def time_category_size(self):
self.draws.groupby(self.cats).size()
Expand Down Expand Up @@ -467,7 +469,7 @@ class SumMultiLevel(object):

def setup(self):
N = 50
self.df = DataFrame({'A': range(N) * 2,
self.df = DataFrame({'A': list(range(N)) * 2,
'B': range(N * 2),
'C': 1}).set_index(['A', 'B'])

Expand Down
19 changes: 13 additions & 6 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import pandas.util.testing as tm
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
Expand Down Expand Up @@ -91,7 +93,8 @@ def time_getitem_pos_slice(self, index):
self.s[:80000]

def time_get_value(self, index):
self.s.get_value(self.lbl)
with warnings.catch_warnings(record=True):
self.s.get_value(self.lbl)

def time_getitem_scalar(self, index):
self.s[self.lbl]
Expand All @@ -112,7 +115,8 @@ def setup(self):
self.bool_obj_indexer = self.bool_indexer.astype(object)

def time_get_value(self):
self.df.get_value(self.idx_scalar, self.col_scalar)
with warnings.catch_warnings(record=True):
self.df.get_value(self.idx_scalar, self.col_scalar)

def time_ix(self):
self.df.ix[self.idx_scalar, self.col_scalar]
Expand Down Expand Up @@ -231,11 +235,13 @@ class PanelIndexing(object):
goal_time = 0.2

def setup(self):
self.p = Panel(np.random.randn(100, 100, 100))
self.inds = range(0, 100, 10)
with warnings.catch_warnings(record=True):
self.p = Panel(np.random.randn(100, 100, 100))
self.inds = range(0, 100, 10)

def time_subset(self):
self.p.ix[(self.inds, self.inds, self.inds)]
with warnings.catch_warnings(record=True):
self.p.ix[(self.inds, self.inds, self.inds)]


class MethodLookup(object):
Expand Down Expand Up @@ -295,7 +301,8 @@ def setup(self):
def time_insert(self):
np.random.seed(1234)
for i in range(100):
self.df.insert(0, i, np.random.randn(self.N))
self.df.insert(0, i, np.random.randn(self.N),
allow_duplicates=True)

def time_assign_with_setitem(self):
np.random.seed(1234)
Expand Down
21 changes: 13 additions & 8 deletions asv_bench/benchmarks/io/hdf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf
import pandas.util.testing as tm
Expand Down Expand Up @@ -105,22 +107,25 @@ class HDFStorePanel(BaseIO):

def setup(self):
self.fname = '__test__.h5'
self.p = Panel(np.random.randn(20, 1000, 25),
items=['Item%03d' % i for i in range(20)],
major_axis=date_range('1/1/2000', periods=1000),
minor_axis=['E%03d' % i for i in range(25)])
self.store = HDFStore(self.fname)
self.store.append('p1', self.p)
with warnings.catch_warnings(record=True):
self.p = Panel(np.random.randn(20, 1000, 25),
items=['Item%03d' % i for i in range(20)],
major_axis=date_range('1/1/2000', periods=1000),
minor_axis=['E%03d' % i for i in range(25)])
self.store = HDFStore(self.fname)
self.store.append('p1', self.p)

def teardown(self):
self.store.close()
self.remove(self.fname)

def time_read_store_table_panel(self):
self.store.select('p1')
with warnings.catch_warnings(record=True):
self.store.select('p1')

def time_write_store_table_panel(self):
self.store.append('p2', self.p)
with warnings.catch_warnings(record=True):
self.store.append('p2', self.p)


class HDF(BaseIO):
Expand Down
23 changes: 16 additions & 7 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
import string

import numpy as np
Expand Down Expand Up @@ -26,7 +27,8 @@ def setup(self):
self.mdf1['obj2'] = 'bar'
self.mdf1['int1'] = 5
try:
self.mdf1.consolidate(inplace=True)
with warnings.catch_warnings(record=True):
self.mdf1.consolidate(inplace=True)
except:
pass
self.mdf2 = self.mdf1.copy()
Expand Down Expand Up @@ -75,16 +77,23 @@ class ConcatPanels(object):
param_names = ['axis', 'ignore_index']

def setup(self, axis, ignore_index):
panel_c = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='C'))
self.panels_c = [panel_c] * 20
panel_f = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='F'))
self.panels_f = [panel_f] * 20
with warnings.catch_warnings(record=True):
panel_c = Panel(np.zeros((10000, 200, 2),
dtype=np.float32,
order='C'))
self.panels_c = [panel_c] * 20
panel_f = Panel(np.zeros((10000, 200, 2),
dtype=np.float32,
order='F'))
self.panels_f = [panel_f] * 20

def time_c_ordered(self, axis, ignore_index):
concat(self.panels_c, axis=axis, ignore_index=ignore_index)
with warnings.catch_warnings(record=True):
concat(self.panels_c, axis=axis, ignore_index=ignore_index)

def time_f_ordered(self, axis, ignore_index):
concat(self.panels_f, axis=axis, ignore_index=ignore_index)
with warnings.catch_warnings(record=True):
concat(self.panels_f, axis=axis, ignore_index=ignore_index)


class ConcatDataFrames(object):
Expand Down
7 changes: 5 additions & 2 deletions asv_bench/benchmarks/offset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import warnings
from datetime import datetime

import numpy as np
Expand Down Expand Up @@ -76,7 +77,8 @@ def setup(self, offset):
self.data = pd.Series(rng)

def time_add_offset(self, offset):
self.data + offset
with warnings.catch_warnings(record=True):
self.data + offset


class OffsetDatetimeIndexArithmetic(object):
Expand All @@ -90,7 +92,8 @@ def setup(self, offset):
self.data = pd.date_range(start='1/1/2000', periods=N, freq='T')

def time_add_offset(self, offset):
self.data + offset
with warnings.catch_warnings(record=True):
self.data + offset


class OffestDatetimeArithmetic(object):
Expand Down
10 changes: 7 additions & 3 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from datetime import datetime, timedelta

from pandas import DataFrame, DatetimeIndex, date_range
Expand All @@ -19,7 +20,8 @@ def setup(self):
self.data_frames[x] = df

def time_from_dict(self):
Panel.from_dict(self.data_frames)
with warnings.catch_warnings(record=True):
Panel.from_dict(self.data_frames)


class SameIndexes(object):
Expand All @@ -34,7 +36,8 @@ def setup(self):
self.data_frames = dict(enumerate([df] * 100))

def time_from_dict(self):
Panel.from_dict(self.data_frames)
with warnings.catch_warnings(record=True):
Panel.from_dict(self.data_frames)


class TwoIndexes(object):
Expand All @@ -53,4 +56,5 @@ def setup(self):
self.data_frames = dict(enumerate(dfs))

def time_from_dict(self):
Panel.from_dict(self.data_frames)
with warnings.catch_warnings(record=True):
Panel.from_dict(self.data_frames)
Loading