From eeeb4a1f2dad85e8e8d9552999415ff5432633ef Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 27 Oct 2019 11:41:37 -0700 Subject: [PATCH] CLN: remove algorithms.match --- pandas/core/algorithms.py | 45 +++----------------------------------- pandas/tests/test_algos.py | 35 ----------------------------- 2 files changed, 3 insertions(+), 77 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8f72245b1f4eb..5139fdfeeb916 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -226,9 +226,8 @@ def _get_hashtable_algo(values): ------- htable : HashTable subclass values : ndarray - dtype : str or dtype """ - values, dtype, ndtype = _ensure_data(values) + values, _, ndtype = _ensure_data(values) if ndtype == "object": @@ -239,7 +238,7 @@ def _get_hashtable_algo(values): ndtype = "string" htable = _hashtables[ndtype] - return htable, values, dtype + return htable, values def _get_values_for_rank(values): @@ -271,44 +270,6 @@ def _get_data_algo(values): # --------------- # -def match(to_match, values, na_sentinel=-1): - """ - Compute locations of to_match into values - - Parameters - ---------- - to_match : array-like - values to find positions of - values : array-like - Unique set of values - na_sentinel : int, default -1 - Value to mark "not found" - - Examples - -------- - - Returns - ------- - match : ndarray of integers - """ - values = com.asarray_tuplesafe(values) - htable, values, dtype = _get_hashtable_algo(values) - to_match, _, _ = _ensure_data(to_match, dtype) - table = htable(min(len(to_match), 1000000)) - table.map_locations(values) - result = table.lookup(to_match) - - if na_sentinel != -1: - # replace but return a numpy array - # use a Series because it handles dtype conversions properly - from pandas import Series - - result = Series(result.ravel()).replace(-1, na_sentinel) - result = result.values.reshape(result.shape) - - return result - - def unique(values): """ Hash table-based unique. Uniques are returned in order @@ -395,7 +356,7 @@ def unique(values): return values.unique() original = values - htable, values, _ = _get_hashtable_algo(values) + htable, values = _get_hashtable_algo(values) table = htable(len(values)) uniques = table.unique(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 48cfc06f42e91..738afaea4b532 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -31,41 +31,6 @@ from pandas.util.testing import assert_almost_equal -class TestMatch: - def test_ints(self): - values = np.array([0, 2, 1]) - to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0]) - - result = algos.match(to_match, values) - expected = np.array([0, 2, 1, 1, 0, 2, -1, 0], dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - result = Series(algos.match(to_match, values, np.nan)) - expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) - tm.assert_series_equal(result, expected) - - s = Series(np.arange(5), dtype=np.float32) - result = algos.match(s, [2, 4]) - expected = np.array([-1, -1, 0, -1, 1], dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - result = Series(algos.match(s, [2, 4], np.nan)) - expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) - tm.assert_series_equal(result, expected) - - def test_strings(self): - values = ["foo", "bar", "baz"] - to_match = ["bar", "foo", "qux", "foo", "bar", "baz", "qux"] - - result = algos.match(to_match, values) - expected = np.array([1, 0, -1, 0, 1, 2, -1], dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - result = Series(algos.match(to_match, values, np.nan)) - expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) - tm.assert_series_equal(result, expected) - - class TestFactorize: def test_basic(self):