pandas-dev · jreback · Jan 6, 2018 · Jan 4, 2018 · Jan 4, 2018 · Jan 6, 2018
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
@@ -1,27 +1,29 @@
 import os
-from pandas import *
-import pandas as pd
-from numpy.random import randn
-from numpy.random import randint
-import pandas.util.testing as tm
-import random
-import numpy as np
-import threading
 from importlib import import_module
 
+import numpy as np
 try:
-    from pandas.compat import range
+    from pandas import Panel
 except ImportError:
-    pass
+    from pandas import WidePanel as Panel  # noqa
+
+# Compatibility import for lib
+for imp in ['pandas._libs.lib', 'pandas.lib']:
+    try:
+        lib = import_module(imp)
+        break
+    except:
+        pass
 
 numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
                   np.float64, np.int16, np.int8, np.uint16, np.uint8]
 datetime_dtypes = [np.datetime64, np.timedelta64]
 
-# This function just needs to be imported into each benchmark file in order to
-# sets up the random seed before each function.
-# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
+
 def setup(*args, **kwargs):
+    # This function just needs to be imported into each benchmark file to
+    # set up the random seed before each function.
+    # http://asv.readthedocs.io/en/latest/writing_benchmarks.html
     np.random.seed(1234)
 
 
@@ -42,22 +44,3 @@ def remove(self, f):
 
     def teardown(self, *args, **kwargs):
         self.remove(self.fname)
-
-# Compatibility import for lib
-for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
-    try:
-        lib = import_module(imp)
-        break
-    except:
-        pass
-
-try:
-    Panel = Panel
-except Exception:
-    Panel = WidePanel
-
-# didn't add to namespace until later
-try:
-    from pandas.core.index import MultiIndex
-except ImportError:
-    pass
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
@@ -1,119 +1,144 @@
-from .pandas_vb_common import *
-import string
-import itertools as IT
-import pandas.util.testing as testing
+import numpy as np
+from pandas import Series
+import pandas.util.testing as tm
 
 
-class StringMethods(object):
-    goal_time = 0.2
+class Methods(object):
 
-    def make_series(self, letters, strlen, size):
-        return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))])
+    goal_time = 0.2
 
     def setup(self):
-        self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000)
-        self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000)
-        self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|')
+        self.s = Series(tm.makeStringIndex(10**5))
 
     def time_cat(self):
-        self.many.str.cat(sep=',')
+        self.s.str.cat(sep=',')
 
     def time_center(self):
-        self.many.str.center(100)
-
-    def time_contains_few(self):
-        self.few.str.contains('matchthis')
-
-    def time_contains_few_noregex(self):
-        self.few.str.contains('matchthis', regex=False)
-
-    def time_contains_many(self):
-        self.many.str.contains('matchthis')
-
-    def time_contains_many_noregex(self):
-        self.many.str.contains('matchthis', regex=False)
+        self.s.str.center(100)
 
     def time_count(self):
-        self.many.str.count('matchthis')
+        self.s.str.count('A')
 
     def time_endswith(self):
-        self.many.str.endswith('matchthis')
+        self.s.str.endswith('A')
 
     def time_extract(self):
-        self.many.str.extract('(\\w*)matchthis(\\w*)')
+        self.s.str.extract('(\\w*)A(\\w*)')
 
     def time_findall(self):
-        self.many.str.findall('[A-Z]+')
+        self.s.str.findall('[A-Z]+')
 
     def time_get(self):
-        self.many.str.get(0)
-
-    def time_join_split(self):
-        self.many.str.join('--').str.split('--')
-
-    def time_join_split_expand(self):
-        self.many.str.join('--').str.split('--', expand=True)
+        self.s.str.get(0)
 
     def time_len(self):
-        self.many.str.len()
+        self.s.str.len()
 
     def time_match(self):
-        self.many.str.match('mat..this')
+        self.s.str.match('A')
 
     def time_pad(self):
-        self.many.str.pad(100, side='both')
-
-    def time_repeat(self):
-        self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many))))
+        self.s.str.pad(100, side='both')
 
     def time_replace(self):
-        self.many.str.replace('(matchthis)', '\x01\x01')
+        self.s.str.replace('A', '\x01\x01')
 
     def time_slice(self):
-        self.many.str.slice(5, 15, 2)
+        self.s.str.slice(5, 15, 2)
 
     def time_startswith(self):
-        self.many.str.startswith('matchthis')
+        self.s.str.startswith('A')
 
     def time_strip(self):
-        self.many.str.strip('matchthis')
+        self.s.str.strip('A')
 
     def time_rstrip(self):
-        self.many.str.rstrip('matchthis')
+        self.s.str.rstrip('A')
 
     def time_lstrip(self):
-        self.many.str.lstrip('matchthis')
+        self.s.str.lstrip('A')
 
     def time_title(self):
-        self.many.str.title()
+        self.s.str.title()
 
     def time_upper(self):
-        self.many.str.upper()
+        self.s.str.upper()
 
     def time_lower(self):
-        self.many.str.lower()
+        self.s.str.lower()
+
+
+class Repeat(object):
+
+    goal_time = 0.2
+    params = ['int', 'array']
+    param_names = ['repeats']
+
+    def setup(self, repeats):
+        N = 10**5
+        self.s = Series(tm.makeStringIndex(N))
+        repeat = {'int': 1, 'array': np.random.randint(1, 3, N)}
+        self.repeat = repeat[repeats]
+
+    def time_repeat(self, repeats):
+        self.s.str.repeat(self.repeat)
+
+
+class Contains(object):
+
+    goal_time = 0.2
+    params = [True, False]
+    param_names = ['regex']
+
+    def setup(self, regex):
+        self.s = Series(tm.makeStringIndex(10**5))
+
+    def time_contains(self, regex):
+        self.s.str.contains('A', regex=regex)
+
+
+class Split(object):
+
+    goal_time = 0.2
+    params = [True, False]
+    param_names = ['expand']
+
+    def setup(self, expand):
+        self.s = Series(tm.makeStringIndex(10**5)).str.join('--')
+
+    def time_split(self, expand):
+        self.s.str.split('--', expand=expand)
+
+
+class Dummies(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = Series(tm.makeStringIndex(10**5)).str.join('|')
 
     def time_get_dummies(self):
         self.s.str.get_dummies('|')
 
 
-class StringEncode(object):
+class Encode(object):
+
     goal_time = 0.2
 
     def setup(self):
-        self.ser = Series(testing.makeUnicodeIndex())
+        self.ser = Series(tm.makeUnicodeIndex())
 
     def time_encode_decode(self):
         self.ser.str.encode('utf-8').str.decode('utf-8')
 
 
-class StringSlice(object):
+class Slice(object):
 
     goal_time = 0.2
 
     def setup(self):
         self.s = Series(['abcdefg', np.nan] * 500000)
 
-    def time_series_string_vector_slice(self):
+    def time_vector_slice(self):
         # GH 2602
         self.s.str[:5]
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -24,7 +24,7 @@ if [ "$LINT" ]; then
     echo "Linting setup.py DONE"
 
     echo "Linting asv_bench/benchmarks/"
-    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/[ps]*.py --ignore=F811
+    flake8 asv_bench/benchmarks/  --exclude=asv_bench/benchmarks/*.py --ignore=F811
     if [ $? -ne "0" ]; then
         RET=1
     fi