|
1 |
| -from .pandas_vb_common import * |
2 |
| -import string |
3 |
| -import itertools as IT |
4 |
| -import pandas.util.testing as testing |
| 1 | +import numpy as np |
| 2 | +from pandas import Series |
| 3 | +import pandas.util.testing as tm |
5 | 4 |
|
6 | 5 |
|
7 |
| -class StringMethods(object): |
8 |
| - goal_time = 0.2 |
| 6 | +class Methods(object): |
9 | 7 |
|
10 |
| - def make_series(self, letters, strlen, size): |
11 |
| - return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) |
| 8 | + goal_time = 0.2 |
12 | 9 |
|
13 | 10 | def setup(self):
|
14 |
| - self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) |
15 |
| - self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) |
16 |
| - self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') |
| 11 | + self.s = Series(tm.makeStringIndex(10**5)) |
17 | 12 |
|
18 | 13 | def time_cat(self):
|
19 |
| - self.many.str.cat(sep=',') |
| 14 | + self.s.str.cat(sep=',') |
20 | 15 |
|
21 | 16 | def time_center(self):
|
22 |
| - self.many.str.center(100) |
23 |
| - |
24 |
| - def time_contains_few(self): |
25 |
| - self.few.str.contains('matchthis') |
26 |
| - |
27 |
| - def time_contains_few_noregex(self): |
28 |
| - self.few.str.contains('matchthis', regex=False) |
29 |
| - |
30 |
| - def time_contains_many(self): |
31 |
| - self.many.str.contains('matchthis') |
32 |
| - |
33 |
| - def time_contains_many_noregex(self): |
34 |
| - self.many.str.contains('matchthis', regex=False) |
| 17 | + self.s.str.center(100) |
35 | 18 |
|
36 | 19 | def time_count(self):
|
37 |
| - self.many.str.count('matchthis') |
| 20 | + self.s.str.count('A') |
38 | 21 |
|
39 | 22 | def time_endswith(self):
|
40 |
| - self.many.str.endswith('matchthis') |
| 23 | + self.s.str.endswith('A') |
41 | 24 |
|
42 | 25 | def time_extract(self):
|
43 |
| - self.many.str.extract('(\\w*)matchthis(\\w*)') |
| 26 | + self.s.str.extract('(\\w*)A(\\w*)') |
44 | 27 |
|
45 | 28 | def time_findall(self):
|
46 |
| - self.many.str.findall('[A-Z]+') |
| 29 | + self.s.str.findall('[A-Z]+') |
47 | 30 |
|
48 | 31 | def time_get(self):
|
49 |
| - self.many.str.get(0) |
50 |
| - |
51 |
| - def time_join_split(self): |
52 |
| - self.many.str.join('--').str.split('--') |
53 |
| - |
54 |
| - def time_join_split_expand(self): |
55 |
| - self.many.str.join('--').str.split('--', expand=True) |
| 32 | + self.s.str.get(0) |
56 | 33 |
|
57 | 34 | def time_len(self):
|
58 |
| - self.many.str.len() |
| 35 | + self.s.str.len() |
59 | 36 |
|
60 | 37 | def time_match(self):
|
61 |
| - self.many.str.match('mat..this') |
| 38 | + self.s.str.match('A') |
62 | 39 |
|
63 | 40 | def time_pad(self):
|
64 |
| - self.many.str.pad(100, side='both') |
65 |
| - |
66 |
| - def time_repeat(self): |
67 |
| - self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many)))) |
| 41 | + self.s.str.pad(100, side='both') |
68 | 42 |
|
69 | 43 | def time_replace(self):
|
70 |
| - self.many.str.replace('(matchthis)', '\x01\x01') |
| 44 | + self.s.str.replace('A', '\x01\x01') |
71 | 45 |
|
72 | 46 | def time_slice(self):
|
73 |
| - self.many.str.slice(5, 15, 2) |
| 47 | + self.s.str.slice(5, 15, 2) |
74 | 48 |
|
75 | 49 | def time_startswith(self):
|
76 |
| - self.many.str.startswith('matchthis') |
| 50 | + self.s.str.startswith('A') |
77 | 51 |
|
78 | 52 | def time_strip(self):
|
79 |
| - self.many.str.strip('matchthis') |
| 53 | + self.s.str.strip('A') |
80 | 54 |
|
81 | 55 | def time_rstrip(self):
|
82 |
| - self.many.str.rstrip('matchthis') |
| 56 | + self.s.str.rstrip('A') |
83 | 57 |
|
84 | 58 | def time_lstrip(self):
|
85 |
| - self.many.str.lstrip('matchthis') |
| 59 | + self.s.str.lstrip('A') |
86 | 60 |
|
87 | 61 | def time_title(self):
|
88 |
| - self.many.str.title() |
| 62 | + self.s.str.title() |
89 | 63 |
|
90 | 64 | def time_upper(self):
|
91 |
| - self.many.str.upper() |
| 65 | + self.s.str.upper() |
92 | 66 |
|
93 | 67 | def time_lower(self):
|
94 |
| - self.many.str.lower() |
| 68 | + self.s.str.lower() |
| 69 | + |
| 70 | + |
| 71 | +class Repeat(object): |
| 72 | + |
| 73 | + goal_time = 0.2 |
| 74 | + params = ['int', 'array'] |
| 75 | + param_names = ['repeats'] |
| 76 | + |
| 77 | + def setup(self, repeats): |
| 78 | + N = 10**5 |
| 79 | + self.s = Series(tm.makeStringIndex(N)) |
| 80 | + repeat = {'int': 1, 'array': np.random.randint(1, 3, N)} |
| 81 | + self.repeat = repeat[repeats] |
| 82 | + |
| 83 | + def time_repeat(self, repeats): |
| 84 | + self.s.str.repeat(self.repeat) |
| 85 | + |
| 86 | + |
| 87 | +class Contains(object): |
| 88 | + |
| 89 | + goal_time = 0.2 |
| 90 | + params = [True, False] |
| 91 | + param_names = ['regex'] |
| 92 | + |
| 93 | + def setup(self, regex): |
| 94 | + self.s = Series(tm.makeStringIndex(10**5)) |
| 95 | + |
| 96 | + def time_contains(self, regex): |
| 97 | + self.s.str.contains('A', regex=regex) |
| 98 | + |
| 99 | + |
| 100 | +class Split(object): |
| 101 | + |
| 102 | + goal_time = 0.2 |
| 103 | + params = [True, False] |
| 104 | + param_names = ['expand'] |
| 105 | + |
| 106 | + def setup(self, expand): |
| 107 | + self.s = Series(tm.makeStringIndex(10**5)).str.join('--') |
| 108 | + |
| 109 | + def time_split(self, expand): |
| 110 | + self.s.str.split('--', expand=expand) |
| 111 | + |
| 112 | + |
| 113 | +class Dummies(object): |
| 114 | + |
| 115 | + goal_time = 0.2 |
| 116 | + |
| 117 | + def setup(self): |
| 118 | + self.s = Series(tm.makeStringIndex(10**5)).str.join('|') |
95 | 119 |
|
96 | 120 | def time_get_dummies(self):
|
97 | 121 | self.s.str.get_dummies('|')
|
98 | 122 |
|
99 | 123 |
|
100 |
| -class StringEncode(object): |
| 124 | +class Encode(object): |
| 125 | + |
101 | 126 | goal_time = 0.2
|
102 | 127 |
|
103 | 128 | def setup(self):
|
104 |
| - self.ser = Series(testing.makeUnicodeIndex()) |
| 129 | + self.ser = Series(tm.makeUnicodeIndex()) |
105 | 130 |
|
106 | 131 | def time_encode_decode(self):
|
107 | 132 | self.ser.str.encode('utf-8').str.decode('utf-8')
|
108 | 133 |
|
109 | 134 |
|
110 |
| -class StringSlice(object): |
| 135 | +class Slice(object): |
111 | 136 |
|
112 | 137 | goal_time = 0.2
|
113 | 138 |
|
114 | 139 | def setup(self):
|
115 | 140 | self.s = Series(['abcdefg', np.nan] * 500000)
|
116 | 141 |
|
117 |
| - def time_series_string_vector_slice(self): |
| 142 | + def time_vector_slice(self): |
118 | 143 | # GH 2602
|
119 | 144 | self.s.str[:5]
|
0 commit comments