|
| 1 | +""" |
| 2 | +Generic data algorithms |
| 3 | +""" |
| 4 | + |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +from pandas.core.series import Series |
| 8 | +import pandas.core.common as com |
| 9 | +import pandas._tseries as lib |
| 10 | + |
| 11 | +def match(values, index): |
| 12 | + """ |
| 13 | +
|
| 14 | +
|
| 15 | + Parameters |
| 16 | + ---------- |
| 17 | +
|
| 18 | + Returns |
| 19 | + ------- |
| 20 | + match : ndarray |
| 21 | + """ |
| 22 | + if com.is_float_dtype(index): |
| 23 | + return _match_generic(values, index, lib.Float64HashTable, |
| 24 | + _ensure_float64) |
| 25 | + elif com.is_integer_dtype(index): |
| 26 | + return _match_generic(values, index, lib.Int64HashTable, |
| 27 | + _ensure_int64) |
| 28 | + else: |
| 29 | + return _match_generic(values, index, lib.PyObjectHashTable, |
| 30 | + _ensure_object) |
| 31 | + |
| 32 | + |
| 33 | +def count(values, uniques=None): |
| 34 | + if uniques is not None: |
| 35 | + raise NotImplementedError |
| 36 | + else: |
| 37 | + if com.is_float_dtype(values): |
| 38 | + return _count_generic(values, lib.Float64HashTable, |
| 39 | + _ensure_float64) |
| 40 | + elif com.is_integer_dtype(values): |
| 41 | + return _count_generic(values, lib.Int64HashTable, |
| 42 | + _ensure_int64) |
| 43 | + else: |
| 44 | + return _count_generic(values, lib.PyObjectHashTable, |
| 45 | + _ensure_object) |
| 46 | + |
| 47 | +def _count_generic(values, table_type, type_caster): |
| 48 | + values = type_caster(values) |
| 49 | + table = table_type(len(values)) |
| 50 | + uniques, labels, counts = table.factorize(values) |
| 51 | + |
| 52 | + return Series(counts, index=uniques) |
| 53 | + |
| 54 | +def _match_generic(values, index, table_type, type_caster): |
| 55 | + values = type_caster(values) |
| 56 | + index = type_caster(index) |
| 57 | + table = table_type(len(index)) |
| 58 | + table.map_locations(index) |
| 59 | + return table.lookup(values) |
| 60 | + |
| 61 | +def factorize(values): |
| 62 | + pass |
| 63 | + |
| 64 | +def unique(values): |
| 65 | + pass |
| 66 | + |
| 67 | +def _ensure_float64(arr): |
| 68 | + if arr.dtype != np.float64: |
| 69 | + arr = arr.astype(np.float64) |
| 70 | + return arr |
| 71 | + |
| 72 | +def _ensure_int64(arr): |
| 73 | + if arr.dtype != np.int64: |
| 74 | + arr = arr.astype(np.int64) |
| 75 | + return arr |
| 76 | + |
| 77 | +def _ensure_object(arr): |
| 78 | + if arr.dtype != np.object_: |
| 79 | + arr = arr.astype('O') |
| 80 | + return arr |
0 commit comments