forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathframe_ctor.py
141 lines (104 loc) · 4.3 KB
/
frame_ctor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
try:
from pandas.tseries import offsets
except:
from pandas.core.datetools import *
# ----------------------------------------------------------------------
# Creation from nested dict
class FromDicts(object):
goal_time = 0.2
def setup(self):
np.random.seed(1234)
N, K = 5000, 50
self.index = tm.makeStringIndex(N)
self.columns = tm.makeStringIndex(K)
self.frame = DataFrame(np.random.randn(N, K),
index=self.index,
columns=self.columns)
self.data = self.frame.to_dict()
self.some_dict = list(self.data.values())[0]
self.dict_list = self.frame.to_dict(orient='records')
self.data2 = {i: {j: float(j) for j in range(100)}
for i in range(2000)}
def time_frame_ctor_list_of_dict(self):
DataFrame(self.dict_list)
def time_frame_ctor_nested_dict(self):
DataFrame(self.data)
def time_series_ctor_from_dict(self):
Series(self.some_dict)
def time_frame_ctor_nested_dict_int64(self):
# nested dict, integer indexes, regression described in #621
DataFrame(self.data2)
# from a mi-series
class FromSeries(object):
goal_time = 0.2
def setup(self):
self.mi = MultiIndex.from_product([range(100), range(100)])
self.s = Series(np.random.randn(10000), index=self.mi)
def time_frame_from_mi_series(self):
DataFrame(self.s)
# ----------------------------------------------------------------------
# From dict with DatetimeIndex with all offsets
# dynamically generate benchmarks for every offset
#
# get_period_count & get_index_for_offset are there because blindly taking each
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
def get_period_count(start_date, off):
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
if (ten_offsets_in_days == 0):
return 1000
else:
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
return min(periods, 1000)
def get_index_for_offset(off):
start_date = Timestamp('1/1/1900')
return date_range(start_date,
periods=get_period_count(start_date, off),
freq=off)
all_offsets = offsets.__all__
# extra cases
for off in ['FY5253', 'FY5253Quarter']:
all_offsets.pop(all_offsets.index(off))
all_offsets.extend([off + '_1', off + '_2'])
class FromDictwithTimestampOffsets(object):
params = [all_offsets, [1, 2]]
param_names = ['offset', 'n_steps']
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
'FY5253': {'startingMonth': 1, 'weekday': 1},
'FY5253Quarter': {'qtr_with_extra_week': 1,
'startingMonth': 1,
'weekday': 1}}
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
'FY5253Quarter': {'variation': ['nearest', 'last']}}
def setup(self, offset, n_steps):
np.random.seed(1234)
extra = False
if offset.endswith("_", None, -1):
extra = int(offset[-1])
offset = offset[:-2]
kwargs = {}
if offset in self.offset_kwargs:
kwargs = self.offset_kwargs[offset]
if extra:
extras = self.offset_extra_cases[offset]
for extra_arg in extras:
kwargs[extra_arg] = extras[extra_arg][extra - 1]
offset = getattr(offsets, offset)
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
self.d = self.df.to_dict()
def time_frame_ctor(self, offset, n_steps):
DataFrame(self.d)
class FromRecords(object):
goal_time = 0.2
params = [None, 1000]
param_names = ['nrows']
def setup(self, nrows):
N = 100000
self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
def time_frame_from_records_generator(self, nrows):
# issue-6700
self.df = DataFrame.from_records(self.gen, nrows=nrows)