Skip to content

Commit c8baf30

Browse files
committed
CLN: ASV frame_ctor benchmarks
1 parent 38f41e6 commit c8baf30

File tree

2 files changed

+41
-38
lines changed

2 files changed

+41
-38
lines changed

asv_bench/benchmarks/frame_ctor.py

+27-38
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas.util.testing as tm
3+
from pandas import DataFrame, Series, MultiIndex
4+
from pandas import *
25
try:
36
from pandas.tseries.offsets import *
47
except:
@@ -9,24 +12,23 @@
912
# Creation from nested dict
1013

1114
class FromDicts(object):
15+
1216
goal_time = 0.2
1317

1418
def setup(self):
15-
(N, K) = (5000, 50)
19+
np.random.seed(1234)
20+
N, K = 5000, 50
1621
self.index = tm.makeStringIndex(N)
1722
self.columns = tm.makeStringIndex(K)
18-
self.frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
19-
try:
20-
self.data = self.frame.to_dict()
21-
except:
22-
self.data = self.frame.toDict()
23+
self.frame = DataFrame(np.random.randn(N, K),
24+
index=self.index,
25+
columns=self.columns)
26+
self.data = self.frame.to_dict()
2327
self.some_dict = list(self.data.values())[0]
24-
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]
25-
28+
self.dict_list = self.frame.to_dict(orient='records')
2629
self.data2 = {i: {j: float(j) for j in range(100)}
2730
for i in range(2000)}
2831

29-
3032
def time_frame_ctor_list_of_dict(self):
3133
DataFrame(self.dict_list)
3234

@@ -38,38 +40,21 @@ def time_series_ctor_from_dict(self):
3840

3941
def time_frame_ctor_nested_dict_int64(self):
4042
# nested dict, integer indexes, regression described in #621
41-
DataFrame(self.data)
43+
DataFrame(self.data2)
4244

4345

4446
# from a mi-series
4547

46-
class frame_from_series(object):
48+
class FromSeries(object):
4749
goal_time = 0.2
4850

4951
def setup(self):
50-
self.mi = MultiIndex.from_tuples([(x, y) for x in range(100) for y in range(100)])
51-
self.s = Series(randn(10000), index=self.mi)
52+
self.mi = MultiIndex.from_product([range(100), range(100)])
53+
self.s = Series(np.random.randn(10000), index=self.mi)
5254

5355
def time_frame_from_mi_series(self):
5456
DataFrame(self.s)
5557

56-
57-
#----------------------------------------------------------------------
58-
# get_numeric_data
59-
60-
class frame_get_numeric_data(object):
61-
goal_time = 0.2
62-
63-
def setup(self):
64-
self.df = DataFrame(randn(10000, 25))
65-
self.df['foo'] = 'bar'
66-
self.df['bar'] = 'baz'
67-
self.df = self.df.consolidate()
68-
69-
def time_frame_get_numeric_data(self):
70-
self.df._get_numeric_data()
71-
72-
7358
# ----------------------------------------------------------------------
7459
# From dict with DatetimeIndex with all offsets
7560

@@ -84,13 +69,15 @@ def get_period_count(start_date, off):
8469
if (ten_offsets_in_days == 0):
8570
return 1000
8671
else:
87-
return min((9 * ((Timestamp.max - start_date).days // ten_offsets_in_days)), 1000)
72+
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
73+
return min(periods, 1000)
8874

8975

9076
def get_index_for_offset(off):
9177
start_date = Timestamp('1/1/1900')
92-
return date_range(start_date, periods=min(1000, get_period_count(
93-
start_date, off)), freq=off)
78+
return date_range(start_date,
79+
periods=get_period_count(start_date, off),
80+
freq=off)
9481

9582

9683
all_offsets = offsets.__all__
@@ -100,21 +87,23 @@ def get_index_for_offset(off):
10087
all_offsets.extend([off + '_1', off + '_2'])
10188

10289

103-
class FrameConstructorDTIndexFromOffsets(object):
90+
class FromDictwithTimestampOffsets(object):
10491

10592
params = [all_offsets, [1, 2]]
10693
param_names = ['offset', 'n_steps']
10794

10895
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
10996
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
11097
'FY5253': {'startingMonth': 1, 'weekday': 1},
111-
'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}}
98+
'FY5253Quarter': {'qtr_with_extra_week': 1,
99+
'startingMonth': 1,
100+
'weekday': 1}}
112101

113102
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
114103
'FY5253Quarter': {'variation': ['nearest', 'last']}}
115104

116105
def setup(self, offset, n_steps):
117-
106+
np.random.seed(1234)
118107
extra = False
119108
if offset.endswith("_", None, -1):
120109
extra = int(offset[-1])
@@ -132,7 +121,7 @@ def setup(self, offset, n_steps):
132121
offset = getattr(offsets, offset)
133122
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
134123
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
135-
self.d = dict(self.df.items())
124+
self.d = self.df.to_dict()
136125

137126
def time_frame_ctor(self, offset, n_steps):
138127
DataFrame(self.d)

asv_bench/benchmarks/frame_methods.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
from .pandas_vb_common import *
22
import string
33

4+
#----------------------------------------------------------------------
5+
# get_numeric_data
6+
7+
class frame_get_numeric_data(object):
8+
goal_time = 0.2
9+
10+
def setup(self):
11+
self.df = DataFrame(np.random.randn(10000, 25))
12+
self.df['foo'] = 'bar'
13+
self.df['bar'] = 'baz'
14+
self.df = self.df.consolidate()
15+
16+
def time_frame_get_numeric_data(self):
17+
self.df._get_numeric_data()
418

519
#----------------------------------------------------------------------
620
# lookup

0 commit comments

Comments
 (0)