9
9
import pandas .lib as lib
10
10
import pandas ._algos as _algos
11
11
12
+
12
13
def match (to_match , values , na_sentinel = - 1 ):
13
14
"""
14
15
Compute locations of to_match into values
@@ -36,6 +37,7 @@ def match(to_match, values, na_sentinel=-1):
36
37
f = lambda htype , caster : _match_generic (to_match , values , htype , caster )
37
38
return _hashtable_algo (f , values .dtype )
38
39
40
+
39
41
def unique (values ):
40
42
"""
41
43
Compute unique values (not necessarily sorted) efficiently from input array
@@ -62,6 +64,7 @@ def count(values, uniques=None):
62
64
else :
63
65
return _hashtable_algo (f , values .dtype )
64
66
67
+
65
68
def _hashtable_algo (f , dtype ):
66
69
"""
67
70
f(HashTable, type_caster) -> result
@@ -83,13 +86,15 @@ def _count_generic(values, table_type, type_caster):
83
86
84
87
return Series (counts , index = uniques )
85
88
89
+
86
90
def _match_generic (values , index , table_type , type_caster ):
87
91
values = type_caster (values )
88
92
index = type_caster (index )
89
93
table = table_type (min (len (index ), 1000000 ))
90
94
table .map_locations (index )
91
95
return table .lookup (values )
92
96
97
+
93
98
def _unique_generic (values , table_type , type_caster ):
94
99
values = type_caster (values )
95
100
table = table_type (min (len (values ), 1000000 ))
@@ -138,6 +143,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
138
143
139
144
return labels , uniques , counts
140
145
146
+
141
147
def value_counts (values , sort = True , ascending = False ):
142
148
"""
143
149
Compute a histogram of the counts of non-null values
@@ -192,6 +198,7 @@ def rank(values, axis=0, method='average', na_option='keep',
192
198
ascending = ascending )
193
199
return ranks
194
200
201
+
195
202
def quantile (x , q , interpolation_method = 'fraction' ):
196
203
"""
197
204
Compute sample quantile or quantiles of the input array. For example, q=0.5
@@ -254,8 +261,8 @@ def _get_score(at):
254
261
elif interpolation_method == 'higher' :
255
262
score = values [np .ceil (idx )]
256
263
else :
257
- raise ValueError ("interpolation_method can only be 'fraction', " \
258
- "'lower' or 'higher'" )
264
+ raise ValueError ("interpolation_method can only be 'fraction' "
265
+ ", 'lower' or 'higher'" )
259
266
260
267
return score
261
268
@@ -265,11 +272,12 @@ def _get_score(at):
265
272
q = np .asarray (q , np .float64 )
266
273
return _algos .arrmap_float64 (q , _get_score )
267
274
275
+
268
276
def _interpolate (a , b , fraction ):
269
277
"""Returns the point at the given fraction between a and b, where
270
278
'fraction' must be between 0 and 1.
271
279
"""
272
- return a + (b - a )* fraction
280
+ return a + (b - a ) * fraction
273
281
274
282
275
283
def _get_data_algo (values , func_map ):
@@ -287,6 +295,7 @@ def _get_data_algo(values, func_map):
287
295
values = com ._ensure_object (values )
288
296
return f , values
289
297
298
+
290
299
def group_position (* args ):
291
300
"""
292
301
Get group position
@@ -303,19 +312,19 @@ def group_position(*args):
303
312
304
313
305
314
_rank1d_functions = {
306
- 'float64' : lib .rank_1d_float64 ,
307
- 'int64' : lib .rank_1d_int64 ,
308
- 'generic' : lib .rank_1d_generic
315
+ 'float64' : lib .rank_1d_float64 ,
316
+ 'int64' : lib .rank_1d_int64 ,
317
+ 'generic' : lib .rank_1d_generic
309
318
}
310
319
311
320
_rank2d_functions = {
312
- 'float64' : lib .rank_2d_float64 ,
313
- 'int64' : lib .rank_2d_int64 ,
314
- 'generic' : lib .rank_2d_generic
321
+ 'float64' : lib .rank_2d_float64 ,
322
+ 'int64' : lib .rank_2d_int64 ,
323
+ 'generic' : lib .rank_2d_generic
315
324
}
316
325
317
326
_hashtables = {
318
- 'float64' : lib .Float64HashTable ,
319
- 'int64' : lib .Int64HashTable ,
320
- 'generic' : lib .PyObjectHashTable
327
+ 'float64' : lib .Float64HashTable ,
328
+ 'int64' : lib .Int64HashTable ,
329
+ 'generic' : lib .PyObjectHashTable
321
330
}
0 commit comments