Skip to content

Commit df54e85

Browse files
committed
ENH: beef up Float64HashTable, add algorithms.py to start collecting central algos, close #502 at some point
1 parent ae92cf8 commit df54e85

File tree

2 files changed

+180
-39
lines changed

2 files changed

+180
-39
lines changed

pandas/core/algorithms.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Generic data algorithms
3+
"""
4+
5+
import numpy as np
6+
7+
from pandas.core.series import Series
8+
import pandas.core.common as com
9+
import pandas._tseries as lib
10+
11+
def match(values, index):
12+
"""
13+
14+
15+
Parameters
16+
----------
17+
18+
Returns
19+
-------
20+
match : ndarray
21+
"""
22+
if com.is_float_dtype(index):
23+
return _match_generic(values, index, lib.Float64HashTable,
24+
_ensure_float64)
25+
elif com.is_integer_dtype(index):
26+
return _match_generic(values, index, lib.Int64HashTable,
27+
_ensure_int64)
28+
else:
29+
return _match_generic(values, index, lib.PyObjectHashTable,
30+
_ensure_object)
31+
32+
33+
def count(values, uniques=None):
34+
if uniques is not None:
35+
raise NotImplementedError
36+
else:
37+
if com.is_float_dtype(values):
38+
return _count_generic(values, lib.Float64HashTable,
39+
_ensure_float64)
40+
elif com.is_integer_dtype(values):
41+
return _count_generic(values, lib.Int64HashTable,
42+
_ensure_int64)
43+
else:
44+
return _count_generic(values, lib.PyObjectHashTable,
45+
_ensure_object)
46+
47+
def _count_generic(values, table_type, type_caster):
48+
values = type_caster(values)
49+
table = table_type(len(values))
50+
uniques, labels, counts = table.factorize(values)
51+
52+
return Series(counts, index=uniques)
53+
54+
def _match_generic(values, index, table_type, type_caster):
55+
values = type_caster(values)
56+
index = type_caster(index)
57+
table = table_type(len(index))
58+
table.map_locations(index)
59+
return table.lookup(values)
60+
61+
def factorize(values):
62+
pass
63+
64+
def unique(values):
65+
pass
66+
67+
def _ensure_float64(arr):
68+
if arr.dtype != np.float64:
69+
arr = arr.astype(np.float64)
70+
return arr
71+
72+
def _ensure_int64(arr):
73+
if arr.dtype != np.int64:
74+
arr = arr.astype(np.int64)
75+
return arr
76+
77+
def _ensure_object(arr):
78+
if arr.dtype != np.object_:
79+
arr = arr.astype('O')
80+
return arr

0 commit comments

Comments
 (0)