|
2 | 2 | import pandas.util.testing as tm
|
3 | 3 | from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
|
4 | 4 | try:
|
5 |
| - from pandas.tseries import offsets |
6 |
| -except: |
7 |
| - from pandas.core.datetools import * |
| 5 | + from pandas.tseries.offsets import Nano, Hour |
| 6 | +except ImportError: |
| 7 | + # For compatability with older versions |
| 8 | + from pandas.core.datetools import * # noqa |
8 | 9 |
|
| 10 | +from .pandas_vb_common import setup # noqa |
9 | 11 |
|
10 |
| -# ---------------------------------------------------------------------- |
11 |
| -# Creation from nested dict |
12 | 12 |
|
13 | 13 | class FromDicts(object):
|
14 | 14 |
|
15 | 15 | goal_time = 0.2
|
16 | 16 |
|
17 | 17 | def setup(self):
|
18 |
| - np.random.seed(1234) |
19 | 18 | N, K = 5000, 50
|
20 |
| - self.index = tm.makeStringIndex(N) |
21 |
| - self.columns = tm.makeStringIndex(K) |
22 |
| - self.frame = DataFrame(np.random.randn(N, K), |
23 |
| - index=self.index, |
24 |
| - columns=self.columns) |
25 |
| - self.data = self.frame.to_dict() |
| 19 | + index = tm.makeStringIndex(N) |
| 20 | + columns = tm.makeStringIndex(K) |
| 21 | + frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) |
| 22 | + self.data = frame.to_dict() |
26 | 23 | self.some_dict = list(self.data.values())[0]
|
27 |
| - self.dict_list = self.frame.to_dict(orient='records') |
| 24 | + self.dict_list = frame.to_dict(orient='records') |
28 | 25 | self.data2 = {i: {j: float(j) for j in range(100)}
|
29 | 26 | for i in range(2000)}
|
30 | 27 |
|
31 |
| - def time_frame_ctor_list_of_dict(self): |
| 28 | + def time_list_of_dict(self): |
32 | 29 | DataFrame(self.dict_list)
|
33 | 30 |
|
34 |
| - def time_frame_ctor_nested_dict(self): |
| 31 | + def time_nested_dict(self): |
35 | 32 | DataFrame(self.data)
|
36 | 33 |
|
37 |
| - def time_series_ctor_from_dict(self): |
| 34 | + def time_dict(self): |
38 | 35 | Series(self.some_dict)
|
39 | 36 |
|
40 |
| - def time_frame_ctor_nested_dict_int64(self): |
| 37 | + def time_nested_dict_int64(self): |
41 | 38 | # nested dict, integer indexes, regression described in #621
|
42 | 39 | DataFrame(self.data2)
|
43 | 40 |
|
44 | 41 |
|
45 |
| -# from a mi-series |
46 |
| - |
47 | 42 | class FromSeries(object):
|
| 43 | + |
48 | 44 | goal_time = 0.2
|
49 | 45 |
|
50 | 46 | def setup(self):
|
51 |
| - self.mi = MultiIndex.from_product([range(100), range(100)]) |
52 |
| - self.s = Series(np.random.randn(10000), index=self.mi) |
| 47 | + mi = MultiIndex.from_product([range(100), range(100)]) |
| 48 | + self.s = Series(np.random.randn(10000), index=mi) |
53 | 49 |
|
54 |
| - def time_frame_from_mi_series(self): |
| 50 | + def time_mi_series(self): |
55 | 51 | DataFrame(self.s)
|
56 | 52 |
|
57 |
| -# ---------------------------------------------------------------------- |
58 |
| -# From dict with DatetimeIndex with all offsets |
59 |
| - |
60 |
| -# dynamically generate benchmarks for every offset |
61 |
| -# |
62 |
| -# get_period_count & get_index_for_offset are there because blindly taking each |
63 |
| -# offset times 1000 can easily go out of Timestamp bounds and raise errors. |
64 |
| - |
65 |
| - |
66 |
| -def get_period_count(start_date, off): |
67 |
| - ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days |
68 |
| - if (ten_offsets_in_days == 0): |
69 |
| - return 1000 |
70 |
| - else: |
71 |
| - periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days |
72 |
| - return min(periods, 1000) |
73 |
| - |
74 | 53 |
|
75 |
| -def get_index_for_offset(off): |
76 |
| - start_date = Timestamp('1/1/1900') |
77 |
| - return date_range(start_date, |
78 |
| - periods=get_period_count(start_date, off), |
79 |
| - freq=off) |
| 54 | +class FromDictwithTimestamp(object): |
80 | 55 |
|
| 56 | + goal_time = 0.2 |
| 57 | + params = [Nano(1), Hour(1)] |
| 58 | + param_names = ['offset'] |
81 | 59 |
|
82 |
| -all_offsets = offsets.__all__ |
83 |
| -# extra cases |
84 |
| -for off in ['FY5253', 'FY5253Quarter']: |
85 |
| - all_offsets.pop(all_offsets.index(off)) |
86 |
| - all_offsets.extend([off + '_1', off + '_2']) |
| 60 | + def setup(self, offset): |
| 61 | + N = 10**3 |
| 62 | + np.random.seed(1234) |
| 63 | + idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N) |
| 64 | + df = DataFrame(np.random.randn(N, 10), index=idx) |
| 65 | + self.d = df.to_dict() |
87 | 66 |
|
| 67 | + def time_dict_with_timestamp_offsets(self, offset): |
| 68 | + DataFrame(self.d) |
88 | 69 |
|
89 |
| -class FromDictwithTimestampOffsets(object): |
90 | 70 |
|
91 |
| - params = [all_offsets, [1, 2]] |
92 |
| - param_names = ['offset', 'n_steps'] |
| 71 | +class FromRecords(object): |
93 | 72 |
|
94 |
| - offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1}, |
95 |
| - 'LastWeekOfMonth': {'weekday': 1, 'week': 1}, |
96 |
| - 'FY5253': {'startingMonth': 1, 'weekday': 1}, |
97 |
| - 'FY5253Quarter': {'qtr_with_extra_week': 1, |
98 |
| - 'startingMonth': 1, |
99 |
| - 'weekday': 1}} |
| 73 | + goal_time = 0.2 |
| 74 | + params = [None, 1000] |
| 75 | + param_names = ['nrows'] |
100 | 76 |
|
101 |
| - offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']}, |
102 |
| - 'FY5253Quarter': {'variation': ['nearest', 'last']}} |
| 77 | + def setup(self, nrows): |
| 78 | + N = 100000 |
| 79 | + self.gen = ((x, (x * 20), (x * 100)) for x in range(N)) |
103 | 80 |
|
104 |
| - def setup(self, offset, n_steps): |
105 |
| - np.random.seed(1234) |
106 |
| - extra = False |
107 |
| - if offset.endswith("_", None, -1): |
108 |
| - extra = int(offset[-1]) |
109 |
| - offset = offset[:-2] |
110 |
| - |
111 |
| - kwargs = {} |
112 |
| - if offset in self.offset_kwargs: |
113 |
| - kwargs = self.offset_kwargs[offset] |
114 |
| - |
115 |
| - if extra: |
116 |
| - extras = self.offset_extra_cases[offset] |
117 |
| - for extra_arg in extras: |
118 |
| - kwargs[extra_arg] = extras[extra_arg][extra - 1] |
119 |
| - |
120 |
| - offset = getattr(offsets, offset) |
121 |
| - self.idx = get_index_for_offset(offset(n_steps, **kwargs)) |
122 |
| - self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) |
123 |
| - self.d = self.df.to_dict() |
124 |
| - |
125 |
| - def time_frame_ctor(self, offset, n_steps): |
126 |
| - DataFrame(self.d) |
| 81 | + def time_frame_from_records_generator(self, nrows): |
| 82 | + # issue-6700 |
| 83 | + self.df = DataFrame.from_records(self.gen, nrows=nrows) |
0 commit comments