Skip to content

Commit 1c1f507

Browse files
mroeschkejorisvandenbossche
authored andcommitted
CLN: ASV FromDictwithTimestamp (#18527)
1 parent 451811d commit 1c1f507

File tree

1 file changed

+19
-72
lines changed

1 file changed

+19
-72
lines changed

asv_bench/benchmarks/frame_ctor.py

+19-72
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import pandas.util.testing as tm
33
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
44
try:
5-
from pandas.tseries import offsets
6-
except:
5+
from pandas.tseries.offsets import Nano, Hour
6+
except ImportError:
7+
# For compatability with older versions
78
from pandas.core.datetools import * # noqa
89

910
from .pandas_vb_common import setup # noqa
@@ -24,16 +25,16 @@ def setup(self):
2425
self.data2 = {i: {j: float(j) for j in range(100)}
2526
for i in range(2000)}
2627

27-
def time_frame_ctor_list_of_dict(self):
28+
def time_list_of_dict(self):
2829
DataFrame(self.dict_list)
2930

30-
def time_frame_ctor_nested_dict(self):
31+
def time_nested_dict(self):
3132
DataFrame(self.data)
3233

33-
def time_series_ctor_from_dict(self):
34+
def time_dict(self):
3435
Series(self.some_dict)
3536

36-
def time_frame_ctor_nested_dict_int64(self):
37+
def time_nested_dict_int64(self):
3738
# nested dict, integer indexes, regression described in #621
3839
DataFrame(self.data2)
3940

@@ -46,78 +47,24 @@ def setup(self):
4647
mi = MultiIndex.from_product([range(100), range(100)])
4748
self.s = Series(np.random.randn(10000), index=mi)
4849

49-
def time_frame_from_mi_series(self):
50+
def time_mi_series(self):
5051
DataFrame(self.s)
5152

52-
# ----------------------------------------------------------------------
53-
# From dict with DatetimeIndex with all offsets
5453

55-
# dynamically generate benchmarks for every offset
56-
#
57-
# get_period_count & get_index_for_offset are there because blindly taking each
58-
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
54+
class FromDictwithTimestamp(object):
5955

56+
goal_time = 0.2
57+
params = [Nano(1), Hour(1)]
58+
param_names = ['offset']
6059

61-
def get_period_count(start_date, off):
62-
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
63-
if (ten_offsets_in_days == 0):
64-
return 1000
65-
else:
66-
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
67-
return min(periods, 1000)
68-
69-
70-
def get_index_for_offset(off):
71-
start_date = Timestamp('1/1/1900')
72-
return date_range(start_date,
73-
periods=get_period_count(start_date, off),
74-
freq=off)
75-
76-
77-
all_offsets = offsets.__all__
78-
# extra cases
79-
for off in ['FY5253', 'FY5253Quarter']:
80-
all_offsets.pop(all_offsets.index(off))
81-
all_offsets.extend([off + '_1', off + '_2'])
82-
83-
84-
class FromDictwithTimestampOffsets(object):
85-
86-
params = [all_offsets, [1, 2]]
87-
param_names = ['offset', 'n_steps']
88-
89-
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
90-
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
91-
'FY5253': {'startingMonth': 1, 'weekday': 1},
92-
'FY5253Quarter': {'qtr_with_extra_week': 1,
93-
'startingMonth': 1,
94-
'weekday': 1}}
95-
96-
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
97-
'FY5253Quarter': {'variation': ['nearest', 'last']}}
98-
99-
def setup(self, offset, n_steps):
60+
def setup(self, offset):
61+
N = 10**3
10062
np.random.seed(1234)
101-
extra = False
102-
if offset.endswith("_", None, -1):
103-
extra = int(offset[-1])
104-
offset = offset[:-2]
105-
106-
kwargs = {}
107-
if offset in self.offset_kwargs:
108-
kwargs = self.offset_kwargs[offset]
109-
110-
if extra:
111-
extras = self.offset_extra_cases[offset]
112-
for extra_arg in extras:
113-
kwargs[extra_arg] = extras[extra_arg][extra - 1]
114-
115-
offset = getattr(offsets, offset)
116-
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
117-
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
118-
self.d = self.df.to_dict()
119-
120-
def time_frame_ctor(self, offset, n_steps):
63+
idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
64+
df = DataFrame(np.random.randn(N, 10), index=idx)
65+
self.d = df.to_dict()
66+
67+
def time_dict_with_timestamp_offsets(self, offset):
12168
DataFrame(self.d)
12269

12370

0 commit comments

Comments
 (0)