Skip to content

Commit a0ac3f5

Browse files
jbrockmendelproost
authored andcommitted
CLN: remove algorithms.match (pandas-dev#29249)
1 parent 70c2eb7 commit a0ac3f5

File tree

2 files changed

+3
-77
lines changed

2 files changed

+3
-77
lines changed

pandas/core/algorithms.py

+3-42
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,8 @@ def _get_hashtable_algo(values):
226226
-------
227227
htable : HashTable subclass
228228
values : ndarray
229-
dtype : str or dtype
230229
"""
231-
values, dtype, ndtype = _ensure_data(values)
230+
values, _, ndtype = _ensure_data(values)
232231

233232
if ndtype == "object":
234233

@@ -239,7 +238,7 @@ def _get_hashtable_algo(values):
239238
ndtype = "string"
240239

241240
htable = _hashtables[ndtype]
242-
return htable, values, dtype
241+
return htable, values
243242

244243

245244
def _get_values_for_rank(values):
@@ -271,44 +270,6 @@ def _get_data_algo(values):
271270
# --------------- #
272271

273272

274-
def match(to_match, values, na_sentinel=-1):
275-
"""
276-
Compute locations of to_match into values
277-
278-
Parameters
279-
----------
280-
to_match : array-like
281-
values to find positions of
282-
values : array-like
283-
Unique set of values
284-
na_sentinel : int, default -1
285-
Value to mark "not found"
286-
287-
Examples
288-
--------
289-
290-
Returns
291-
-------
292-
match : ndarray of integers
293-
"""
294-
values = com.asarray_tuplesafe(values)
295-
htable, values, dtype = _get_hashtable_algo(values)
296-
to_match, _, _ = _ensure_data(to_match, dtype)
297-
table = htable(min(len(to_match), 1000000))
298-
table.map_locations(values)
299-
result = table.lookup(to_match)
300-
301-
if na_sentinel != -1:
302-
# replace but return a numpy array
303-
# use a Series because it handles dtype conversions properly
304-
from pandas import Series
305-
306-
result = Series(result.ravel()).replace(-1, na_sentinel)
307-
result = result.values.reshape(result.shape)
308-
309-
return result
310-
311-
312273
def unique(values):
313274
"""
314275
Hash table-based unique. Uniques are returned in order
@@ -395,7 +356,7 @@ def unique(values):
395356
return values.unique()
396357

397358
original = values
398-
htable, values, _ = _get_hashtable_algo(values)
359+
htable, values = _get_hashtable_algo(values)
399360

400361
table = htable(len(values))
401362
uniques = table.unique(values)

pandas/tests/test_algos.py

-35
Original file line numberDiff line numberDiff line change
@@ -31,41 +31,6 @@
3131
from pandas.util.testing import assert_almost_equal
3232

3333

34-
class TestMatch:
35-
def test_ints(self):
36-
values = np.array([0, 2, 1])
37-
to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0])
38-
39-
result = algos.match(to_match, values)
40-
expected = np.array([0, 2, 1, 1, 0, 2, -1, 0], dtype=np.int64)
41-
tm.assert_numpy_array_equal(result, expected)
42-
43-
result = Series(algos.match(to_match, values, np.nan))
44-
expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0]))
45-
tm.assert_series_equal(result, expected)
46-
47-
s = Series(np.arange(5), dtype=np.float32)
48-
result = algos.match(s, [2, 4])
49-
expected = np.array([-1, -1, 0, -1, 1], dtype=np.int64)
50-
tm.assert_numpy_array_equal(result, expected)
51-
52-
result = Series(algos.match(s, [2, 4], np.nan))
53-
expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1]))
54-
tm.assert_series_equal(result, expected)
55-
56-
def test_strings(self):
57-
values = ["foo", "bar", "baz"]
58-
to_match = ["bar", "foo", "qux", "foo", "bar", "baz", "qux"]
59-
60-
result = algos.match(to_match, values)
61-
expected = np.array([1, 0, -1, 0, 1, 2, -1], dtype=np.int64)
62-
tm.assert_numpy_array_equal(result, expected)
63-
64-
result = Series(algos.match(to_match, values, np.nan))
65-
expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan]))
66-
tm.assert_series_equal(result, expected)
67-
68-
6934
class TestFactorize:
7035
def test_basic(self):
7136

0 commit comments

Comments
 (0)