Skip to content

Commit b81baf5

Browse files
committed
Address comments
1 parent 0c7c6d6 commit b81baf5

File tree

2 files changed

+46
-53
lines changed

2 files changed

+46
-53
lines changed

asv_bench/benchmarks/groupby.py

+26-53
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def time_series_nth(self, df):
159159
df[1].groupby(df[0]).nth(0)
160160

161161

162-
class Incidies(object):
162+
class DateAttributes(object):
163163

164164
goal_time = 0.2
165165

@@ -168,8 +168,8 @@ def setup(self):
168168
self.year, self.month, self.day = rng.year, rng.month, rng.day
169169
self.ts = Series(np.random.randn(len(rng)), index=rng)
170170

171-
def time_datetime_indicies(self):
172-
self.ts.groupby([self.year, self.month, self.day])
171+
def time_len_groupby_object(self):
172+
len(self.ts.groupby([self.year, self.month, self.day]))
173173

174174

175175
class Int64(object):
@@ -194,7 +194,7 @@ class CountMultiDtype(object):
194194

195195
goal_time = 0.2
196196

197-
def setup(self):
197+
def setup_cache(self):
198198
n = 10000
199199
offsets = np.random.randint(n, size=n).astype('timedelta64[ns]')
200200
dates = np.datetime64('now') + offsets
@@ -203,18 +203,19 @@ def setup(self):
203203
value2 = np.random.randn(n)
204204
value2[np.random.rand(n) > 0.5] = np.nan
205205
obj = np.random.choice(list('ab'), size=n).astype(object)
206-
obj[(np.random.randn(n) > 0.5)] = np.nan
207-
self.df = DataFrame({'key1': np.random.randint(0, 500, size=n),
208-
'key2': np.random.randint(0, 100, size=n),
209-
'dates': dates,
210-
'value2': value2,
211-
'value3': np.random.randn(n),
212-
'ints': np.random.randint(0, 1000, size=n),
213-
'obj': obj,
214-
'offsets': offsets})
206+
obj[np.random.randn(n) > 0.5] = np.nan
207+
df = DataFrame({'key1': np.random.randint(0, 500, size=n),
208+
'key2': np.random.randint(0, 100, size=n),
209+
'dates': dates,
210+
'value2': value2,
211+
'value3': np.random.randn(n),
212+
'ints': np.random.randint(0, 1000, size=n),
213+
'obj': obj,
214+
'offsets': offsets})
215+
return df
215216

216-
def time_multi_count(self):
217-
self.df.groupby(['key1', 'key2']).count()
217+
def time_multi_count(self, df):
218+
df.groupby(['key1', 'key2']).count()
218219

219220

220221
class CountInt(object):
@@ -236,7 +237,7 @@ def time_int_nunique(self, df):
236237
df.groupby(['key1', 'key2']).nunique()
237238

238239

239-
class AggMultiColFuncs(object):
240+
class AggFunctions(object):
240241

241242
goal_time = 0.2
242243

@@ -261,22 +262,11 @@ def time_different_numpy_functions(self, df):
261262
'value2': np.var,
262263
'value3': np.sum})
263264

265+
def time_different_python_functions_multicol(self, df):
266+
df.groupby(['key1', 'key2']).agg([sum, min, max])
264267

265-
class AggBuiltins(object):
266-
267-
goal_time = 0.2
268-
269-
def setup_cache(self):
270-
n = 10**5
271-
df = DataFrame(np.random.randint(1, n / 100, (n, 3)),
272-
columns=['jim', 'joe', 'jolie'])
273-
return df
274-
275-
def time_agg_builtin_single_col(self, df):
276-
df.groupby('jim').agg([sum, min, max])
277-
278-
def time_agg_builtins_multi_col(self, df):
279-
df.groupby(['jim', 'joe']).agg([sum, min, max])
268+
def time_different_python_functions_singlecol(self, df):
269+
df.groupby('key1').agg([sum, min, max])
280270

281271

282272
class GroupStrings(object):
@@ -532,38 +522,21 @@ def time_groupby_extra_cat_nosort(self):
532522
class Datelike(object):
533523
# GH 14338
534524
goal_time = 0.2
535-
params = [period_range, date_range, partial(date_range, tz='US/Central')]
525+
params = ['period_range', 'date_range', 'date_range_tz']
536526
param_names = ['grouper']
537527

538528
def setup(self, grouper):
539529
N = 10**4
540-
self.grouper = grouper('1900-01-01', freq='D', periods=N)
530+
rng_map = {'period_range': period_range,
531+
'date_range': date_range,
532+
'date_range_tz': partial(date_range, tz='US/Central')}
533+
self.grouper = rng_map[grouper]('1900-01-01', freq='D', periods=N)
541534
self.df = DataFrame(np.random.randn(10**4, 2))
542535

543536
def time_sum(self, grouper):
544537
self.df.groupby(self.grouper).sum()
545538

546539

547-
class PivotTable(object):
548-
goal_time = 0.2
549-
550-
def setup(self):
551-
N = 100000
552-
fac1 = np.array(['A', 'B', 'C'], dtype='O')
553-
fac2 = np.array(['one', 'two'], dtype='O')
554-
ind1 = np.random.randint(0, 3, size=N)
555-
ind2 = np.random.randint(0, 2, size=N)
556-
self.df = DataFrame({'key1': fac1.take(ind1),
557-
'key2': fac2.take(ind2),
558-
'key3': fac2.take(ind2),
559-
'value1': np.random.randn(N),
560-
'value2': np.random.randn(N),
561-
'value3': np.random.randn(N)})
562-
563-
def time_pivot_table(self):
564-
self.df.pivot_table(index='key1', columns=['key2', 'key3'])
565-
566-
567540
class SumBools(object):
568541
# GH 2692
569542
goal_time = 0.2

asv_bench/benchmarks/reshape.py

+20
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,23 @@ def setup(self):
117117
def time_wide_to_long_big(self):
118118
self.df['id'] = self.df.index
119119
wide_to_long(self.df, list(self.vars), i='id', j='year')
120+
121+
122+
class PivotTable(object):
123+
goal_time = 0.2
124+
125+
def setup(self):
126+
N = 100000
127+
fac1 = np.array(['A', 'B', 'C'], dtype='O')
128+
fac2 = np.array(['one', 'two'], dtype='O')
129+
ind1 = np.random.randint(0, 3, size=N)
130+
ind2 = np.random.randint(0, 2, size=N)
131+
self.df = DataFrame({'key1': fac1.take(ind1),
132+
'key2': fac2.take(ind2),
133+
'key3': fac2.take(ind2),
134+
'value1': np.random.randn(N),
135+
'value2': np.random.randn(N),
136+
'value3': np.random.randn(N)})
137+
138+
def time_pivot_table(self):
139+
self.df.pivot_table(index='key1', columns=['key2', 'key3'])

0 commit comments

Comments
 (0)