|
8 | 8 | import pandas.core.common as com
|
9 | 9 | import pandas._tseries as lib
|
10 | 10 |
|
11 |
| -def match(values, index): |
| 11 | +def match(to_match, values, na_sentinel=-1): |
12 | 12 | """
|
13 |
| -
|
| 13 | + Compute locations of to_match into values |
14 | 14 |
|
15 | 15 | Parameters
|
16 | 16 | ----------
|
| 17 | + to_match : array-like |
| 18 | + values to find positions of |
| 19 | + values : array-like |
| 20 | + Unique set of values |
| 21 | + na_sentinel : int, default -1 |
| 22 | + Value to mark "not found" |
| 23 | +
|
| 24 | + Examples |
| 25 | + -------- |
17 | 26 |
|
18 | 27 | Returns
|
19 | 28 | -------
|
20 |
| - match : ndarray |
| 29 | + match : ndarray of integers |
21 | 30 | """
|
22 |
| - f = lambda htype, caster: _match_generic(values, index, htype, caster) |
23 |
| - return _hashtable_algo(f, index.dtype) |
| 31 | + values = np.asarray(values) |
| 32 | + if issubclass(values.dtype.type, basestring): |
| 33 | + values = np.array(values, dtype='O') |
| 34 | + |
| 35 | + f = lambda htype, caster: _match_generic(to_match, values, htype, caster) |
| 36 | + return _hashtable_algo(f, values.dtype) |
24 | 37 |
|
25 | 38 | def unique(values):
|
26 | 39 | """
|
| 40 | + Compute unique values (not necessarily sorted) efficiently from input array |
| 41 | + of values |
27 | 42 |
|
28 | 43 | Parameters
|
29 | 44 | ----------
|
| 45 | + values : array-like |
30 | 46 |
|
31 | 47 | Returns
|
32 | 48 | -------
|
33 |
| -
|
| 49 | + uniques |
34 | 50 | """
|
35 | 51 | f = lambda htype, caster: _unique_generic(values, htype, caster)
|
36 | 52 | return _hashtable_algo(f, values.dtype)
|
@@ -98,7 +114,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
|
98 | 114 | labels, counts = table.get_labels(values, uniques, 0, na_sentinel)
|
99 | 115 |
|
100 | 116 | labels = com._ensure_platform_int(labels)
|
101 |
| - |
| 117 | + |
102 | 118 | uniques = com._asarray_tuplesafe(uniques)
|
103 | 119 | if sort and len(counts) > 0:
|
104 | 120 | sorter = uniques.argsort()
|
|
0 commit comments