2
2
import pandas as pd
3
3
4
4
5
- class dtype_infer_datetime64 (object ):
5
+ class DtypeInfer (object ):
6
6
goal_time = 0.2
7
7
8
- def setup (self ):
9
- self .N = 500000
10
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
11
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
12
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
13
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
14
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
15
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
16
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
17
-
18
- def time_dtype_infer_datetime64 (self ):
19
- (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ])
20
-
21
-
22
- class dtype_infer_float32 (object ):
23
- goal_time = 0.2
8
+ # from GH 7332
24
9
25
10
def setup (self ):
26
11
self .N = 500000
27
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
28
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
29
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
30
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
31
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
32
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
33
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
34
-
35
- def time_dtype_infer_float32 (self ):
36
- (self .df_float32 ['A' ] + self .df_float32 ['B' ])
12
+ self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ),
13
+ B = np .arange (self .N , dtype = 'int64' )))
14
+ self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ),
15
+ B = np .arange (self .N , dtype = 'int32' )))
16
+ self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ),
17
+ B = np .arange (self .N , dtype = 'uint32' )))
18
+ self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ),
19
+ B = np .arange (self .N , dtype = 'float64' )))
20
+ self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ),
21
+ B = np .arange (self .N , dtype = 'float32' )))
22
+ self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ),
23
+ B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
24
+ self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]),
25
+ B = self .df_datetime64 ['B' ]))
26
+
27
+ def time_int64 (self ):
28
+ (self .df_int64 ['A' ] + self .df_int64 ['B' ])
37
29
30
+ def time_int32 (self ):
31
+ (self .df_int32 ['A' ] + self .df_int32 ['B' ])
38
32
39
- class dtype_infer_float64 ( object ):
40
- goal_time = 0.2
33
+ def time_uint32 ( self ):
34
+ ( self . df_uint32 [ 'A' ] + self . df_uint32 [ 'B' ])
41
35
42
- def setup (self ):
43
- self .N = 500000
44
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
45
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
46
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
47
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
48
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
49
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
50
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
51
-
52
- def time_dtype_infer_float64 (self ):
36
+ def time_float64 (self ):
53
37
(self .df_float64 ['A' ] + self .df_float64 ['B' ])
54
38
39
+ def time_float32 (self ):
40
+ (self .df_float32 ['A' ] + self .df_float32 ['B' ])
55
41
56
- class dtype_infer_int32 (object ):
57
- goal_time = 0.2
58
-
59
- def setup (self ):
60
- self .N = 500000
61
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
62
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
63
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
64
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
65
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
66
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
67
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
68
-
69
- def time_dtype_infer_int32 (self ):
70
- (self .df_int32 ['A' ] + self .df_int32 ['B' ])
71
-
42
+ def time_datetime64 (self ):
43
+ (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ])
72
44
73
- class dtype_infer_int64 ( object ):
74
- goal_time = 0.2
45
+ def time_timedelta64_1 ( self ):
46
+ ( self . df_timedelta64 [ 'A' ] + self . df_timedelta64 [ 'B' ])
75
47
76
- def setup (self ):
77
- self .N = 500000
78
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
79
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
80
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
81
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
82
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
83
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
84
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
85
-
86
- def time_dtype_infer_int64 (self ):
87
- (self .df_int64 ['A' ] + self .df_int64 ['B' ])
48
+ def time_timedelta64_2 (self ):
49
+ (self .df_timedelta64 ['A' ] + self .df_timedelta64 ['A' ])
88
50
89
51
90
- class dtype_infer_timedelta64_1 (object ):
52
+ class to_numeric (object ):
91
53
goal_time = 0.2
92
54
93
55
def setup (self ):
94
- self .N = 500000
95
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
96
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
97
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
98
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
99
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
100
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
101
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
102
-
103
- def time_dtype_infer_timedelta64_1 (self ):
104
- (self .df_timedelta64 ['A' ] + self .df_timedelta64 ['B' ])
105
-
56
+ self .n = 10000
57
+ self .float = Series (np .random .randn (self .n * 100 ))
58
+ self .numstr = self .float .astype ('str' )
59
+ self .str = Series (tm .makeStringIndex (self .n ))
106
60
107
- class dtype_infer_timedelta64_2 (object ):
108
- goal_time = 0.2
109
-
110
- def setup (self ):
111
- self .N = 500000
112
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
113
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
114
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
115
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
116
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
117
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
118
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
119
-
120
- def time_dtype_infer_timedelta64_2 (self ):
121
- (self .df_timedelta64 ['A' ] + self .df_timedelta64 ['A' ])
61
+ def time_from_float (self ):
62
+ pd .to_numeric (self .float )
122
63
64
+ def time_from_numeric_str (self ):
65
+ pd .to_numeric (self .numstr )
123
66
124
- class dtype_infer_uint32 ( object ):
125
- goal_time = 0.2
67
+ def time_from_str_ignore ( self ):
68
+ pd . to_numeric ( self . str , errors = 'ignore' )
126
69
127
- def setup (self ):
128
- self .N = 500000
129
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ), B = np .arange (self .N , dtype = 'int64' )))
130
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ), B = np .arange (self .N , dtype = 'int32' )))
131
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ), B = np .arange (self .N , dtype = 'uint32' )))
132
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ), B = np .arange (self .N , dtype = 'float64' )))
133
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ), B = np .arange (self .N , dtype = 'float32' )))
134
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ), B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
135
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]), B = self .df_datetime64 ['B' ]))
136
-
137
- def time_dtype_infer_uint32 (self ):
138
- (self .df_uint32 ['A' ] + self .df_uint32 ['B' ])
70
+ def time_from_str_coerce (self ):
71
+ pd .to_numeric (self .str , errors = 'coerce' )
139
72
140
73
141
- class to_numeric (object ):
74
+ class to_numeric_downcast (object ):
142
75
143
76
param_names = ['dtype' , 'downcast' ]
144
77
params = [['string-float' , 'string-int' , 'string-nint' , 'datetime64' ,
@@ -162,4 +95,4 @@ def setup(self, dtype, downcast):
162
95
self .data = self .data_dict [dtype ]
163
96
164
97
def time_downcast (self , dtype , downcast ):
165
- pd .to_numeric (self .data , downcast = downcast )
98
+ pd .to_numeric (self .data , downcast = downcast )
0 commit comments