1
1
from importlib import import_module
2
2
3
3
import numpy as np
4
- import pyarrow as pa
5
4
6
5
import pandas as pd
7
6
@@ -20,9 +19,9 @@ class Factorize:
20
19
[True , False ],
21
20
[True , False ],
22
21
[
23
- "int " ,
24
- "uint " ,
25
- "float " ,
22
+ "int64 " ,
23
+ "uint64 " ,
24
+ "float64 " ,
26
25
"object" ,
27
26
"object_str" ,
28
27
"datetime64[ns]" ,
@@ -36,28 +35,24 @@ class Factorize:
36
35
37
36
def setup (self , unique , sort , dtype ):
38
37
N = 10 ** 5
39
- string_index = tm .makeStringIndex (N )
40
- string_arrow = None
41
- if dtype == "string[pyarrow]" :
42
- try :
43
- string_arrow = pd .array (string_index , dtype = "string[pyarrow]" )
44
- except ImportError :
45
- raise NotImplementedError
46
-
47
- data = {
48
- "int" : pd .Index (np .arange (N ), dtype = "int64" ),
49
- "uint" : pd .Index (np .arange (N ), dtype = "uint64" ),
50
- "float" : pd .Index (np .random .randn (N ), dtype = "float64" ),
51
- "object_str" : string_index ,
52
- "object" : pd .Index (np .arange (N ), dtype = "object" ),
53
- "datetime64[ns]" : pd .date_range ("2011-01-01" , freq = "H" , periods = N ),
54
- "datetime64[ns, tz]" : pd .date_range (
55
- "2011-01-01" , freq = "H" , periods = N , tz = "Asia/Tokyo"
56
- ),
57
- "Int64" : pd .array (np .arange (N ), dtype = "Int64" ),
58
- "boolean" : pd .array (np .random .randint (0 , 2 , N ), dtype = "boolean" ),
59
- "string[pyarrow]" : string_arrow ,
60
- }[dtype ]
38
+
39
+ if dtype in ["int64" , "uint64" , "Int64" , "object" ]:
40
+ data = pd .Index (np .arange (N ), dtype = dtype )
41
+ elif dtype == "float64" :
42
+ data = pd .Index (np .random .randn (N ), dtype = dtype )
43
+ elif dtype == "boolean" :
44
+ data = pd .array (np .random .randint (0 , 2 , N ), dtype = dtype )
45
+ elif dtype == "datetime64[ns]" :
46
+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N )
47
+ elif dtype == "datetime64[ns, tz]" :
48
+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo" )
49
+ elif dtype == "object_str" :
50
+ data = tm .makeStringIndex (N )
51
+ elif dtype == "string[pyarrow]" :
52
+ data = pd .array (tm .makeStringIndex (N ), dtype = "string[pyarrow]" )
53
+ else :
54
+ raise NotImplementedError
55
+
61
56
if not unique :
62
57
data = data .repeat (5 )
63
58
self .data = data
@@ -74,9 +69,9 @@ class Duplicated:
74
69
[True , False ],
75
70
["first" , "last" , False ],
76
71
[
77
- "int " ,
78
- "uint " ,
79
- "float " ,
72
+ "int64 " ,
73
+ "uint64 " ,
74
+ "float64 " ,
80
75
"string" ,
81
76
"datetime64[ns]" ,
82
77
"datetime64[ns, tz]" ,
@@ -88,22 +83,20 @@ class Duplicated:
88
83
89
84
def setup (self , unique , keep , dtype ):
90
85
N = 10 ** 5
91
- data = {
92
- "int" : pd .Index (np .arange (N ), dtype = "int64" ),
93
- "uint" : pd .Index (np .arange (N ), dtype = "uint64" ),
94
- "float" : pd .Index (np .random .randn (N ), dtype = "float64" ),
95
- "string" : tm .makeStringIndex (N ),
96
- "datetime64[ns]" : pd .date_range ("2011-01-01" , freq = "H" , periods = N ),
97
- "datetime64[ns, tz]" : pd .date_range (
98
- "2011-01-01" , freq = "H" , periods = N , tz = "Asia/Tokyo"
99
- ),
100
- "timestamp[ms][pyarrow]" : pd .Index (
101
- np .arange (N ), dtype = pd .ArrowDtype (pa .timestamp ("ms" ))
102
- ),
103
- "duration[s][pyarrow]" : pd .Index (
104
- np .arange (N ), dtype = pd .ArrowDtype (pa .duration ("s" ))
105
- ),
106
- }[dtype ]
86
+ if dtype in ["int64" , "uint64" ]:
87
+ data = pd .Index (np .arange (N ), dtype = dtype )
88
+ elif dtype == "float64" :
89
+ data = pd .Index (np .random .randn (N ), dtype = "float64" )
90
+ elif dtype == "string" :
91
+ data = tm .makeStringIndex (N )
92
+ elif dtype == "datetime64[ns]" :
93
+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N )
94
+ elif dtype == "datetime64[ns, tz]" :
95
+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo" )
96
+ elif dtype in ["timestamp[ms][pyarrow]" , "duration[s][pyarrow]" ]:
97
+ data = pd .Index (np .arange (N ), dtype = dtype )
98
+ else :
99
+ raise NotImplementedError
107
100
if not unique :
108
101
data = data .repeat (5 )
109
102
self .idx = data
@@ -181,21 +174,22 @@ class Quantile:
181
174
params = [
182
175
[0 , 0.5 , 1 ],
183
176
["linear" , "nearest" , "lower" , "higher" , "midpoint" ],
184
- ["float " , "int " , "uint " ],
177
+ ["float64 " , "int64 " , "uint64 " ],
185
178
]
186
179
param_names = ["quantile" , "interpolation" , "dtype" ]
187
180
188
181
def setup (self , quantile , interpolation , dtype ):
189
182
N = 10 ** 5
190
- data = {
191
- "int" : np .arange (N ),
192
- "uint" : np .arange (N ).astype (np .uint64 ),
193
- "float" : np .random .randn (N ),
194
- }
195
- self .idx = pd .Series (data [dtype ].repeat (5 ))
183
+ if dtype in ["int64" , "uint64" ]:
184
+ data = np .arange (N , dtype = dtype )
185
+ elif dtype == "float64" :
186
+ data = np .random .randn (N )
187
+ else :
188
+ raise NotImplementedError
189
+ self .ser = pd .Series (data .repeat (5 ))
196
190
197
191
def time_quantile (self , quantile , interpolation , dtype ):
198
- self .idx .quantile (quantile , interpolation = interpolation )
192
+ self .ser .quantile (quantile , interpolation = interpolation )
199
193
200
194
201
195
class SortIntegerArray :
0 commit comments