CI: Run ASV on Travis for failed benchmarks

mroeschke · mroeschke · commit 7e837ee0e018 · 2018-02-02T09:58:29.000-08:00
create asv.sh

add to travis

Changed file permission

Add else statement and asv machine config

move to separate travis build

CI: Run ASV on Travis for failed benchmarks

create asv.sh

add to travis

move to separate travis build

Change build order and change to asv dev

Put asv before doc

Fix some failing benchmarks

Grep for failed benchmarks

Add travis wait

Don't grep output yet

Remove travis_wait for now

fix some benchmarks

Now grep output

add travis wait 40 to ci/asv.sh

Add tee

Remove wait

Start catching warnings

catch more warnings

Fix ci strategy
diff --git a/.travis.yml b/.travis.yml
@@ -73,6 +73,10 @@ matrix:
       env:
         - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
     # In allow_failures
+    - dist: trusty
+      env:
+        - JOB="3.6_ASV" ASV=true
+    # In allow_failures
     - dist: trusty
       env:
         - JOB="3.6_DOC" DOC=true
@@ -93,6 +97,9 @@ matrix:
       - dist: trusty
         env:
           - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
+      - dist: trusty
+        env:
+          - JOB="3.6_ASV" ASV=true
       - dist: trusty
         env:
           - JOB="3.6_DOC" DOC=true
@@ -128,6 +135,7 @@ script:
   - ci/script_single.sh
   - ci/script_multi.sh
   - ci/lint.sh
+  - ci/asv.sh
   - echo "checking imports"
   - source activate pandas && python ci/check_imports.py
   - echo "script done"
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -1,3 +1,4 @@
+import warnings
 from importlib import import_module
 
 import numpy as np
@@ -83,7 +84,8 @@ def setup(self):
         self.all = self.uniques.repeat(10)
 
     def time_match_string(self):
-        pd.match(self.all, self.uniques)
+        with warnings.catch_warnings(record=True):
+            pd.match(self.all, self.uniques)
 
 
 class Hashing(object):
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
@@ -119,11 +121,15 @@ def setup(self):
 
         self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
         self.s_str_cat = self.s_str.astype('category')
-        self.s_str_cat_ordered = self.s_str.astype('category', ordered=True)
+        with warnings.catch_warnings(record=True):
+            self.s_str_cat_ordered = self.s_str.astype('category',
+                                                       ordered=True)
 
         self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
         self.s_int_cat = self.s_int.astype('category')
-        self.s_int_cat_ordered = self.s_int.astype('category', ordered=True)
+        with warnings.catch_warnings(record=True):
+            self.s_int_cat_ordered = self.s_int.astype('category',
+                                                       ordered=True)
 
     def time_rank_string(self):
         self.s_str.rank()
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -1,4 +1,6 @@
 import string
+import warnings
+
 import numpy as np
 import pandas.util.testing as tm
 from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
@@ -15,7 +17,8 @@ def setup(self):
         self.df = DataFrame(np.random.randn(10000, 25))
         self.df['foo'] = 'bar'
         self.df['bar'] = 'baz'
-        self.df = self.df.consolidate()
+        with warnings.catch_warnings(record=True):
+            self.df = self.df.consolidate()
 
     def time_frame_get_numeric_data(self):
         self.df._get_numeric_data()
@@ -141,8 +144,8 @@ class Repr(object):
     def setup(self):
         nrows = 10000
         data = np.random.randn(nrows, 10)
-        idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100),
-                                             100))
+        arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
+        idx = MultiIndex.from_arrays(arrays)
         self.df3 = DataFrame(data, index=idx)
         self.df4 = DataFrame(data, index=np.random.randn(nrows))
         self.df_tall = DataFrame(np.random.randn(nrows, 10))
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -1,3 +1,4 @@
+import warnings
 from string import ascii_letters
 from itertools import product
 from functools import partial
@@ -340,7 +341,8 @@ def time_dt_size(self):
         self.df.groupby(['dates']).size()
 
     def time_dt_timegrouper_size(self):
-        self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
+        with warnings.catch_warnings(record=True):
+            self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
 
     def time_category_size(self):
         self.draws.groupby(self.cats).size()
@@ -467,7 +469,7 @@ class SumMultiLevel(object):
 
     def setup(self):
         N = 50
-        self.df = DataFrame({'A': range(N) * 2,
+        self.df = DataFrame({'A': list(range(N)) * 2,
                              'B': range(N * 2),
                              'C': 1}).set_index(['A', 'B'])
 
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import pandas.util.testing as tm
 from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
@@ -91,7 +93,8 @@ def time_getitem_pos_slice(self, index):
         self.s[:80000]
 
     def time_get_value(self, index):
-        self.s.get_value(self.lbl)
+        with warnings.catch_warnings(record=True):
+            self.s.get_value(self.lbl)
 
     def time_getitem_scalar(self, index):
         self.s[self.lbl]
@@ -112,7 +115,8 @@ def setup(self):
         self.bool_obj_indexer = self.bool_indexer.astype(object)
 
     def time_get_value(self):
-        self.df.get_value(self.idx_scalar, self.col_scalar)
+        with warnings.catch_warnings(record=True):
+            self.df.get_value(self.idx_scalar, self.col_scalar)
 
     def time_ix(self):
         self.df.ix[self.idx_scalar, self.col_scalar]
@@ -231,11 +235,13 @@ class PanelIndexing(object):
     goal_time = 0.2
 
     def setup(self):
-        self.p = Panel(np.random.randn(100, 100, 100))
-        self.inds = range(0, 100, 10)
+        with warnings.catch_warnings(record=True):
+            self.p = Panel(np.random.randn(100, 100, 100))
+            self.inds = range(0, 100, 10)
 
     def time_subset(self):
-        self.p.ix[(self.inds, self.inds, self.inds)]
+        with warnings.catch_warnings(record=True):
+            self.p.ix[(self.inds, self.inds, self.inds)]
 
 
 class MethodLookup(object):
@@ -295,7 +301,8 @@ def setup(self):
     def time_insert(self):
         np.random.seed(1234)
         for i in range(100):
-            self.df.insert(0, i, np.random.randn(self.N))
+            self.df.insert(0, i, np.random.randn(self.N),
+                           allow_duplicates=True)
 
     def time_assign_with_setitem(self):
         np.random.seed(1234)
diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf
 import pandas.util.testing as tm
@@ -105,10 +107,11 @@ class HDFStorePanel(BaseIO):
 
     def setup(self):
         self.fname = '__test__.h5'
-        self.p = Panel(np.random.randn(20, 1000, 25),
-                       items=['Item%03d' % i for i in range(20)],
-                       major_axis=date_range('1/1/2000', periods=1000),
-                       minor_axis=['E%03d' % i for i in range(25)])
+        with warnings.catch_warnings(record=True):
+            self.p = Panel(np.random.randn(20, 1000, 25),
+                           items=['Item%03d' % i for i in range(20)],
+                           major_axis=date_range('1/1/2000', periods=1000),
+                           minor_axis=['E%03d' % i for i in range(25)])
         self.store = HDFStore(self.fname)
         self.store.append('p1', self.p)
 
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -1,3 +1,4 @@
+import warnings
 import string
 
 import numpy as np
@@ -26,7 +27,8 @@ def setup(self):
         self.mdf1['obj2'] = 'bar'
         self.mdf1['int1'] = 5
         try:
-            self.mdf1.consolidate(inplace=True)
+            with warnings.catch_warnings(record=True):
+                self.mdf1.consolidate(inplace=True)
         except:
             pass
         self.mdf2 = self.mdf1.copy()
@@ -75,10 +77,15 @@ class ConcatPanels(object):
     param_names = ['axis', 'ignore_index']
 
     def setup(self, axis, ignore_index):
-        panel_c = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='C'))
-        self.panels_c = [panel_c] * 20
-        panel_f = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='F'))
-        self.panels_f = [panel_f] * 20
+        with warnings.catch_warnings(record=True):
+            panel_c = Panel(np.zeros((10000, 200, 2),
+                                     dtype=np.float32,
+                                     order='C'))
+            self.panels_c = [panel_c] * 20
+            panel_f = Panel(np.zeros((10000, 200, 2),
+                            dtype=np.float32,
+                            order='F'))
+            self.panels_f = [panel_f] * 20
 
     def time_c_ordered(self, axis, ignore_index):
         concat(self.panels_c, axis=axis, ignore_index=ignore_index)
diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import warnings
 from datetime import datetime
 
 import numpy as np
@@ -76,7 +77,8 @@ def setup(self, offset):
         self.data = pd.Series(rng)
 
     def time_add_offset(self, offset):
-        self.data + offset
+        with warnings.catch_warnings(record=True):
+            self.data + offset
 
 
 class OffsetDatetimeIndexArithmetic(object):
@@ -90,7 +92,8 @@ def setup(self, offset):
         self.data = pd.date_range(start='1/1/2000', periods=N, freq='T')
 
     def time_add_offset(self, offset):
-        self.data + offset
+        with warnings.catch_warnings(record=True):
+            self.data + offset
 
 
 class OffestDatetimeArithmetic(object):
diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py
@@ -1,3 +1,4 @@
+import warnings
 from datetime import datetime, timedelta
 
 from pandas import DataFrame, DatetimeIndex, date_range
@@ -19,7 +20,8 @@ def setup(self):
             self.data_frames[x] = df
 
     def time_from_dict(self):
-        Panel.from_dict(self.data_frames)
+        with warnings.catch_warnings(record=True):
+            Panel.from_dict(self.data_frames)
 
 
 class SameIndexes(object):
@@ -34,7 +36,8 @@ def setup(self):
         self.data_frames = dict(enumerate([df] * 100))
 
     def time_from_dict(self):
-        Panel.from_dict(self.data_frames)
+        with warnings.catch_warnings(record=True):
+            Panel.from_dict(self.data_frames)
 
 
 class TwoIndexes(object):
@@ -53,4 +56,5 @@ def setup(self):
         self.data_frames = dict(enumerate(dfs))
 
     def time_from_dict(self):
-        Panel.from_dict(self.data_frames)
+        with warnings.catch_warnings(record=True):
+            Panel.from_dict(self.data_frames)
diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 
 from .pandas_vb_common import Panel, setup  # noqa
@@ -10,10 +12,13 @@ class PanelMethods(object):
     param_names = ['axis']
 
     def setup(self, axis):
-        self.panel = Panel(np.random.randn(100, 1000, 100))
+        with warnings.catch_warnings(record=True):
+            self.panel = Panel(np.random.randn(100, 1000, 100))
 
     def time_pct_change(self, axis):
-        self.panel.pct_change(1, axis=axis)
+        with warnings.catch_warnings(record=True):
+            self.panel.pct_change(1, axis=axis)
 
     def time_shift(self, axis):
-        self.panel.shift(1, axis=axis)
+        with warnings.catch_warnings(record=True):
+            self.panel.shift(1, axis=axis)
diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
@@ -167,10 +167,6 @@ def setup(self):
         col_array2 = col_array.copy()
         col_array2[:, :10000] = np.nan
         self.col_array_list = list(col_array)
-        self.col_array_list2 = list(col_array2)
 
     def time_lib_fast_zip(self):
         lib.fast_zip(self.col_array_list)
-
-    def time_lib_fast_zip_fillna(self):
-        lib.fast_zip_fillna(self.col_array_list2)
diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
@@ -104,9 +104,9 @@ def setup(self):
         self.letters = list('ABCD')
         yrvars = [l + str(num)
                   for l, num in product(self.letters, range(1, nyrs + 1))]
-
+        columns = [str(i) for i in range(nidvars)] + yrvars
         self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)),
-                            columns=list(range(nidvars)) + yrvars)
+                            columns=columns)
         self.df['id'] = self.df.index
 
     def time_wide_to_long_big(self):
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 from pandas import Series
 import pandas.util.testing as tm
@@ -23,7 +25,8 @@ def time_endswith(self):
         self.s.str.endswith('A')
 
     def time_extract(self):
-        self.s.str.extract('(\\w*)A(\\w*)')
+        with warnings.catch_warnings(record=True):
+            self.s.str.extract('(\\w*)A(\\w*)')
 
     def time_findall(self):
         self.s.str.findall('[A-Z]+')
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
@@ -1,3 +1,4 @@
+import warnings
 from datetime import timedelta
 
 import numpy as np
@@ -74,7 +75,8 @@ def setup(self):
                                                   freq='S'))
 
     def time_infer_dst(self):
-        self.index.tz_localize('US/Eastern', infer_dst=True)
+        with warnings.catch_warnings(record=True):
+            self.index.tz_localize('US/Eastern', infer_dst=True)
 
 
 class ResetIndex(object):
@@ -365,7 +367,7 @@ class ToDatetimeCache(object):
 
     def setup(self, cache):
         N = 10000
-        self.unique_numeric_seconds = range(N)
+        self.unique_numeric_seconds = list(range(N))
         self.dup_numeric_seconds = [1000] * N
         self.dup_string_dates = ['2000-02-11'] * N
         self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * N
diff --git a/ci/asv.sh b/ci/asv.sh
diff --git a/ci/requirements-3.6_ASV.build b/ci/requirements-3.6_ASV.build
diff --git a/ci/requirements-3.6_ASV.run b/ci/requirements-3.6_ASV.run
diff --git a/ci/requirements-3.6_ASV.sh b/ci/requirements-3.6_ASV.sh
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
diff --git a/ci/script_single.sh b/ci/script_single.sh