Skip to content

Commit 2f6c1b1

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV string (#19069)
1 parent d539bdd commit 2f6c1b1

File tree

3 files changed

+96
-88
lines changed

3 files changed

+96
-88
lines changed
+15-32
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
11
import os
2-
from pandas import *
3-
import pandas as pd
4-
from numpy.random import randn
5-
from numpy.random import randint
6-
import pandas.util.testing as tm
7-
import random
8-
import numpy as np
9-
import threading
102
from importlib import import_module
113

4+
import numpy as np
125
try:
13-
from pandas.compat import range
6+
from pandas import Panel
147
except ImportError:
15-
pass
8+
from pandas import WidePanel as Panel # noqa
9+
10+
# Compatibility import for lib
11+
for imp in ['pandas._libs.lib', 'pandas.lib']:
12+
try:
13+
lib = import_module(imp)
14+
break
15+
except:
16+
pass
1617

1718
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
1819
np.float64, np.int16, np.int8, np.uint16, np.uint8]
1920
datetime_dtypes = [np.datetime64, np.timedelta64]
2021

21-
# This function just needs to be imported into each benchmark file in order to
22-
# sets up the random seed before each function.
23-
# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
22+
2423
def setup(*args, **kwargs):
24+
# This function just needs to be imported into each benchmark file to
25+
# set up the random seed before each function.
26+
# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
2527
np.random.seed(1234)
2628

2729

@@ -42,22 +44,3 @@ def remove(self, f):
4244

4345
def teardown(self, *args, **kwargs):
4446
self.remove(self.fname)
45-
46-
# Compatibility import for lib
47-
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
48-
try:
49-
lib = import_module(imp)
50-
break
51-
except:
52-
pass
53-
54-
try:
55-
Panel = Panel
56-
except Exception:
57-
Panel = WidePanel
58-
59-
# didn't add to namespace until later
60-
try:
61-
from pandas.core.index import MultiIndex
62-
except ImportError:
63-
pass

asv_bench/benchmarks/strings.py

+80-55
Original file line numberDiff line numberDiff line change
@@ -1,119 +1,144 @@
1-
from .pandas_vb_common import *
2-
import string
3-
import itertools as IT
4-
import pandas.util.testing as testing
1+
import numpy as np
2+
from pandas import Series
3+
import pandas.util.testing as tm
54

65

7-
class StringMethods(object):
8-
goal_time = 0.2
6+
class Methods(object):
97

10-
def make_series(self, letters, strlen, size):
11-
return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))])
8+
goal_time = 0.2
129

1310
def setup(self):
14-
self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000)
15-
self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000)
16-
self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|')
11+
self.s = Series(tm.makeStringIndex(10**5))
1712

1813
def time_cat(self):
19-
self.many.str.cat(sep=',')
14+
self.s.str.cat(sep=',')
2015

2116
def time_center(self):
22-
self.many.str.center(100)
23-
24-
def time_contains_few(self):
25-
self.few.str.contains('matchthis')
26-
27-
def time_contains_few_noregex(self):
28-
self.few.str.contains('matchthis', regex=False)
29-
30-
def time_contains_many(self):
31-
self.many.str.contains('matchthis')
32-
33-
def time_contains_many_noregex(self):
34-
self.many.str.contains('matchthis', regex=False)
17+
self.s.str.center(100)
3518

3619
def time_count(self):
37-
self.many.str.count('matchthis')
20+
self.s.str.count('A')
3821

3922
def time_endswith(self):
40-
self.many.str.endswith('matchthis')
23+
self.s.str.endswith('A')
4124

4225
def time_extract(self):
43-
self.many.str.extract('(\\w*)matchthis(\\w*)')
26+
self.s.str.extract('(\\w*)A(\\w*)')
4427

4528
def time_findall(self):
46-
self.many.str.findall('[A-Z]+')
29+
self.s.str.findall('[A-Z]+')
4730

4831
def time_get(self):
49-
self.many.str.get(0)
50-
51-
def time_join_split(self):
52-
self.many.str.join('--').str.split('--')
53-
54-
def time_join_split_expand(self):
55-
self.many.str.join('--').str.split('--', expand=True)
32+
self.s.str.get(0)
5633

5734
def time_len(self):
58-
self.many.str.len()
35+
self.s.str.len()
5936

6037
def time_match(self):
61-
self.many.str.match('mat..this')
38+
self.s.str.match('A')
6239

6340
def time_pad(self):
64-
self.many.str.pad(100, side='both')
65-
66-
def time_repeat(self):
67-
self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many))))
41+
self.s.str.pad(100, side='both')
6842

6943
def time_replace(self):
70-
self.many.str.replace('(matchthis)', '\x01\x01')
44+
self.s.str.replace('A', '\x01\x01')
7145

7246
def time_slice(self):
73-
self.many.str.slice(5, 15, 2)
47+
self.s.str.slice(5, 15, 2)
7448

7549
def time_startswith(self):
76-
self.many.str.startswith('matchthis')
50+
self.s.str.startswith('A')
7751

7852
def time_strip(self):
79-
self.many.str.strip('matchthis')
53+
self.s.str.strip('A')
8054

8155
def time_rstrip(self):
82-
self.many.str.rstrip('matchthis')
56+
self.s.str.rstrip('A')
8357

8458
def time_lstrip(self):
85-
self.many.str.lstrip('matchthis')
59+
self.s.str.lstrip('A')
8660

8761
def time_title(self):
88-
self.many.str.title()
62+
self.s.str.title()
8963

9064
def time_upper(self):
91-
self.many.str.upper()
65+
self.s.str.upper()
9266

9367
def time_lower(self):
94-
self.many.str.lower()
68+
self.s.str.lower()
69+
70+
71+
class Repeat(object):
72+
73+
goal_time = 0.2
74+
params = ['int', 'array']
75+
param_names = ['repeats']
76+
77+
def setup(self, repeats):
78+
N = 10**5
79+
self.s = Series(tm.makeStringIndex(N))
80+
repeat = {'int': 1, 'array': np.random.randint(1, 3, N)}
81+
self.repeat = repeat[repeats]
82+
83+
def time_repeat(self, repeats):
84+
self.s.str.repeat(self.repeat)
85+
86+
87+
class Contains(object):
88+
89+
goal_time = 0.2
90+
params = [True, False]
91+
param_names = ['regex']
92+
93+
def setup(self, regex):
94+
self.s = Series(tm.makeStringIndex(10**5))
95+
96+
def time_contains(self, regex):
97+
self.s.str.contains('A', regex=regex)
98+
99+
100+
class Split(object):
101+
102+
goal_time = 0.2
103+
params = [True, False]
104+
param_names = ['expand']
105+
106+
def setup(self, expand):
107+
self.s = Series(tm.makeStringIndex(10**5)).str.join('--')
108+
109+
def time_split(self, expand):
110+
self.s.str.split('--', expand=expand)
111+
112+
113+
class Dummies(object):
114+
115+
goal_time = 0.2
116+
117+
def setup(self):
118+
self.s = Series(tm.makeStringIndex(10**5)).str.join('|')
95119

96120
def time_get_dummies(self):
97121
self.s.str.get_dummies('|')
98122

99123

100-
class StringEncode(object):
124+
class Encode(object):
125+
101126
goal_time = 0.2
102127

103128
def setup(self):
104-
self.ser = Series(testing.makeUnicodeIndex())
129+
self.ser = Series(tm.makeUnicodeIndex())
105130

106131
def time_encode_decode(self):
107132
self.ser.str.encode('utf-8').str.decode('utf-8')
108133

109134

110-
class StringSlice(object):
135+
class Slice(object):
111136

112137
goal_time = 0.2
113138

114139
def setup(self):
115140
self.s = Series(['abcdefg', np.nan] * 500000)
116141

117-
def time_series_string_vector_slice(self):
142+
def time_vector_slice(self):
118143
# GH 2602
119144
self.s.str[:5]

ci/lint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ if [ "$LINT" ]; then
2424
echo "Linting setup.py DONE"
2525

2626
echo "Linting asv_bench/benchmarks/"
27-
flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/[ps]*.py --ignore=F811
27+
flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811
2828
if [ $? -ne "0" ]; then
2929
RET=1
3030
fi

0 commit comments

Comments
 (0)