1
- from .pandas_vb_common import *
2
- import pandas as pd
1
+ import numpy as np
2
+ import pandas .util .testing as tm
3
+ from pandas import DataFrame , Series , to_numeric
3
4
5
+ from .pandas_vb_common import numeric_dtypes , lib , setup # noqa
4
6
5
- class DtypeInfer (object ):
6
- goal_time = 0.2
7
7
8
+ class NumericInferOps (object ):
8
9
# from GH 7332
10
+ goal_time = 0.2
11
+ params = numeric_dtypes
12
+ param_names = ['dtype' ]
13
+
14
+ def setup (self , dtype ):
15
+ N = 5 * 10 ** 5
16
+ self .df = DataFrame ({'A' : np .arange (N ).astype (dtype ),
17
+ 'B' : np .arange (N ).astype (dtype )})
18
+
19
+ def time_add (self , dtype ):
20
+ self .df ['A' ] + self .df ['B' ]
21
+
22
+ def time_subtract (self , dtype ):
23
+ self .df ['A' ] - self .df ['B' ]
9
24
10
- def setup (self ):
11
- self .N = 500000
12
- self .df_int64 = DataFrame (dict (A = np .arange (self .N , dtype = 'int64' ),
13
- B = np .arange (self .N , dtype = 'int64' )))
14
- self .df_int32 = DataFrame (dict (A = np .arange (self .N , dtype = 'int32' ),
15
- B = np .arange (self .N , dtype = 'int32' )))
16
- self .df_uint32 = DataFrame (dict (A = np .arange (self .N , dtype = 'uint32' ),
17
- B = np .arange (self .N , dtype = 'uint32' )))
18
- self .df_float64 = DataFrame (dict (A = np .arange (self .N , dtype = 'float64' ),
19
- B = np .arange (self .N , dtype = 'float64' )))
20
- self .df_float32 = DataFrame (dict (A = np .arange (self .N , dtype = 'float32' ),
21
- B = np .arange (self .N , dtype = 'float32' )))
22
- self .df_datetime64 = DataFrame (dict (A = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' ),
23
- B = pd .to_datetime (np .arange (self .N , dtype = 'int64' ), unit = 'ms' )))
24
- self .df_timedelta64 = DataFrame (dict (A = (self .df_datetime64 ['A' ] - self .df_datetime64 ['B' ]),
25
- B = self .df_datetime64 ['B' ]))
25
+ def time_multiply (self , dtype ):
26
+ self .df ['A' ] * self .df ['B' ]
26
27
27
- def time_int64 (self ):
28
- ( self .df_int64 ['A' ] + self .df_int64 ['B' ])
28
+ def time_divide (self , dtype ):
29
+ self .df ['A' ] / self .df ['B' ]
29
30
30
- def time_int32 (self ):
31
- ( self .df_int32 ['A' ] + self .df_int32 ['B' ])
31
+ def time_modulo (self , dtype ):
32
+ self .df ['A' ] % self .df ['B' ]
32
33
33
- def time_uint32 (self ):
34
- (self .df_uint32 ['A' ] + self .df_uint32 ['B' ])
35
34
36
- def time_float64 (self ):
37
- (self .df_float64 ['A' ] + self .df_float64 ['B' ])
35
+ class DateInferOps (object ):
36
+ # from GH 7332
37
+ goal_time = 0.2
38
+
39
+ def setup_cache (self ):
40
+ N = 5 * 10 ** 5
41
+ df = DataFrame ({'datetime64' : np .arange (N ).astype ('datetime64[ms]' )})
42
+ df ['timedelta' ] = df ['datetime64' ] - df ['datetime64' ]
43
+ return df
38
44
39
- def time_float32 (self ):
40
- ( self . df_float32 [ 'A ' ] + self . df_float32 [ 'B' ])
45
+ def time_subtract_datetimes (self , df ):
46
+ df [ 'datetime64 ' ] - df [ 'datetime64' ]
41
47
42
- def time_datetime64 (self ):
43
- ( self . df_datetime64 [ 'A ' ] - self . df_datetime64 [ 'B' ])
48
+ def time_timedelta_plus_datetime (self , df ):
49
+ df [ 'timedelta ' ] + df [ 'datetime64' ]
44
50
45
- def time_timedelta64_1 (self ):
46
- ( self . df_timedelta64 [ 'A ' ] + self . df_timedelta64 [ 'B' ])
51
+ def time_add_timedeltas (self , df ):
52
+ df [ 'timedelta ' ] + df [ 'timedelta' ]
47
53
48
- def time_timedelta64_2 (self ):
49
- (self .df_timedelta64 ['A' ] + self .df_timedelta64 ['A' ])
50
54
55
+ class ToNumeric (object ):
51
56
52
- class to_numeric (object ):
53
57
goal_time = 0.2
58
+ params = ['ignore' , 'coerce' ]
59
+ param_names = ['errors' ]
54
60
55
- def setup (self ):
56
- self . n = 10000
57
- self .float = Series (np .random .randn (self . n * 100 ))
61
+ def setup (self , errors ):
62
+ N = 10000
63
+ self .float = Series (np .random .randn (N ))
58
64
self .numstr = self .float .astype ('str' )
59
- self .str = Series (tm .makeStringIndex (self . n ))
65
+ self .str = Series (tm .makeStringIndex (N ))
60
66
61
- def time_from_float (self ):
62
- pd . to_numeric (self .float )
67
+ def time_from_float (self , errors ):
68
+ to_numeric (self .float , errors = errors )
63
69
64
- def time_from_numeric_str (self ):
65
- pd . to_numeric (self .numstr )
70
+ def time_from_numeric_str (self , errors ):
71
+ to_numeric (self .numstr , errors = errors )
66
72
67
- def time_from_str_ignore (self ):
68
- pd . to_numeric (self .str , errors = 'ignore' )
73
+ def time_from_str (self , errors ):
74
+ to_numeric (self .str , errors = errors )
69
75
70
- def time_from_str_coerce (self ):
71
- pd .to_numeric (self .str , errors = 'coerce' )
72
76
73
-
74
- class to_numeric_downcast (object ):
77
+ class ToNumericDowncast (object ):
75
78
76
79
param_names = ['dtype' , 'downcast' ]
77
80
params = [['string-float' , 'string-int' , 'string-nint' , 'datetime64' ,
@@ -81,37 +84,30 @@ class to_numeric_downcast(object):
81
84
N = 500000
82
85
N2 = int (N / 2 )
83
86
84
- data_dict = {
85
- 'string-int' : (['1' ] * N2 ) + ([2 ] * N2 ),
86
- 'string-nint' : (['-1' ] * N2 ) + ([2 ] * N2 ),
87
- 'datetime64' : np .repeat (np .array (['1970-01-01' , '1970-01-02' ],
88
- dtype = 'datetime64[D]' ), N ),
89
- 'string-float' : (['1.1' ] * N2 ) + ([2 ] * N2 ),
90
- 'int-list' : ([1 ] * N2 ) + ([2 ] * N2 ),
91
- 'int32' : np .repeat (np .int32 (1 ), N )
92
- }
87
+ data_dict = {'string-int' : ['1' ] * N2 + [2 ] * N2 ,
88
+ 'string-nint' : ['-1' ] * N2 + [2 ] * N2 ,
89
+ 'datetime64' : np .repeat (np .array (['1970-01-01' , '1970-01-02' ],
90
+ dtype = 'datetime64[D]' ), N ),
91
+ 'string-float' : ['1.1' ] * N2 + [2 ] * N2 ,
92
+ 'int-list' : [1 ] * N2 + [2 ] * N2 ,
93
+ 'int32' : np .repeat (np .int32 (1 ), N )}
93
94
94
95
def setup (self , dtype , downcast ):
95
96
self .data = self .data_dict [dtype ]
96
97
97
98
def time_downcast (self , dtype , downcast ):
98
- pd . to_numeric (self .data , downcast = downcast )
99
+ to_numeric (self .data , downcast = downcast )
99
100
100
101
101
102
class MaybeConvertNumeric (object ):
102
103
103
- def setup (self ):
104
- n = 1000000
105
- arr = np .repeat ([2 ** 63 ], n )
106
- arr = arr + np .arange (n ).astype ('uint64' )
107
- arr = np .array ([arr [i ] if i % 2 == 0 else
108
- str (arr [i ]) for i in range (n )],
109
- dtype = object )
110
-
111
- arr [- 1 ] = - 1
112
- self .data = arr
113
- self .na_values = set ()
114
-
115
- def time_convert (self ):
116
- lib .maybe_convert_numeric (self .data , self .na_values ,
117
- coerce_numeric = False )
104
+ def setup_cache (self ):
105
+ N = 10 ** 6
106
+ arr = np .repeat ([2 ** 63 ], N ) + np .arange (N ).astype ('uint64' )
107
+ data = arr .astype (object )
108
+ data [1 ::2 ] = arr [1 ::2 ].astype (str )
109
+ data [- 1 ] = - 1
110
+ return data
111
+
112
+ def time_convert (self , data ):
113
+ lib .maybe_convert_numeric (data , set (), coerce_numeric = False )
0 commit comments