Skip to content

Commit b766a74

Browse files
committed
WIP: searchsorted implementation
1 parent 02161ff commit b766a74

File tree

6 files changed

+104
-1
lines changed

6 files changed

+104
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ Reshaping, sorting
415415
Series.sortlevel
416416
Series.swaplevel
417417
Series.unstack
418+
Series.searchsorted
418419

419420
Combining / joining / merging
420421
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/basics.rst

+13
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,19 @@ argument:
12301230
compatibility with NumPy methods which expect the ``ndarray.sort``
12311231
behavior. ``Series.order`` returns a copy of the sorted data.
12321232

1233+
Series has the ``searchsorted`` method, which works similar to
1234+
``np.ndarray.searchsorted``.
1235+
1236+
.. ipython:: python
1237+
1238+
ser = Series([1, 2, 3])
1239+
ser.searchsorted([0, 3])
1240+
ser.searchsorted([0, 4])
1241+
ser.searchsorted([1, 3], side='right')
1242+
ser.searchsorted([1, 3], side='left')
1243+
ser = Series([3, 1, 2])
1244+
ser.searchsorted([0, 3], sorter=np.argsort(ser))
1245+
12331246
.. _basics.nsorted:
12341247

12351248
smallest / largest values

doc/source/v0.15.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,8 @@ Enhancements
821821

822822
- ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`)
823823

824+
- Added ``searchsorted`` method to ``Series`` objects (:issue:`7447`)
825+
824826
.. _whatsnew_0150.performance:
825827

826828
Performance

pandas/core/series.py

+56
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,62 @@ def dot(self, other):
13731373
else: # pragma: no cover
13741374
raise TypeError('unsupported type: %s' % type(other))
13751375

1376+
def searchsorted(self, v, side='left', sorter=None):
1377+
"""Find indices where elements should be inserted to maintain order.
1378+
1379+
Find the indices into a sorted Series `self` such that, if the
1380+
corresponding elements in `v` were inserted before the indices, the
1381+
order of `self` would be preserved.
1382+
1383+
Parameters
1384+
----------
1385+
v : array_like
1386+
Values to insert into `a`.
1387+
side : {'left', 'right'}, optional
1388+
If 'left', the index of the first suitable location found is given.
1389+
If 'right', return the last such index. If there is no suitable
1390+
index, return either 0 or N (where N is the length of `a`).
1391+
sorter : 1-D array_like, optional
1392+
Optional array of integer indices that sort `self` into ascending
1393+
order. They are typically the result of ``np.argsort``.
1394+
1395+
Returns
1396+
-------
1397+
indices : array of ints
1398+
Array of insertion points with the same shape as `v`.
1399+
1400+
See Also
1401+
--------
1402+
Series.sort
1403+
Series.order
1404+
numpy.searchsorted
1405+
1406+
Notes
1407+
-----
1408+
Binary search is used to find the required insertion points.
1409+
1410+
Examples
1411+
--------
1412+
>>> x = pd.Series([1, 2, 3])
1413+
>>> x
1414+
0 1
1415+
1 2
1416+
2 3
1417+
dtype: int64
1418+
>>> x.searchsorted(4)
1419+
array([3])
1420+
>>> x.searchsorted([0, 4])
1421+
array([0, 3])
1422+
>>> x.searchsorted([1, 3], side='left')
1423+
array([0, 2])
1424+
>>> x.searchsorted([1, 3], side='right')
1425+
array([1, 3])
1426+
>>> x.searchsorted([1, 2], side='right', sorter=[0, 2, 1])
1427+
array([1, 3])
1428+
"""
1429+
return self.values.searchsorted(Series(v).values, side=side,
1430+
sorter=sorter)
1431+
13761432
#------------------------------------------------------------------------------
13771433
# Combination
13781434

pandas/tests/test_series.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -5956,7 +5956,6 @@ def test_replace_with_single_list(self):
59565956
s.replace([1,2,3],inplace=True,method='crash_cymbal')
59575957
assert_series_equal(s, ser)
59585958

5959-
59605959
def test_replace_mixed_types(self):
59615960
s = Series(np.arange(5),dtype='int64')
59625961

@@ -6164,6 +6163,37 @@ def test_concat_empty_series_dtypes(self):
61646163
self.assertEqual(pd.concat([Series(dtype=np.bool_),
61656164
Series(dtype=np.int32)]).dtype, np.int32)
61666165

6166+
def test_searchsorted_numeric_dtypes_scalar(self):
6167+
s = Series([1, 2, 90, 1000, 3e9])
6168+
r = s.searchsorted(30)
6169+
e = 2
6170+
tm.assert_equal(r, e)
6171+
6172+
r = s.searchsorted([30])
6173+
e = np.array([2])
6174+
tm.assert_array_equal(r, e)
6175+
6176+
def test_searchsorted_numeric_dtypes_vector(self):
6177+
s = Series([1, 2, 90, 1000, 3e9])
6178+
r = s.searchsorted([91, 2e6])
6179+
e = np.array([3, 4])
6180+
tm.assert_array_equal(r, e)
6181+
6182+
def test_search_sorted_datetime64_scalar(self):
6183+
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
6184+
v = pd.Timestamp('20120102')
6185+
r = s.searchsorted(v)
6186+
e = 1
6187+
tm.assert_equal(r, e)
6188+
6189+
def test_search_sorted_datetime64_list(self):
6190+
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
6191+
v = [pd.Timestamp('20120102'), pd.Timestamp('20120104')]
6192+
r = s.searchsorted(v)
6193+
e = np.array([1, 2])
6194+
tm.assert_array_equal(r, e)
6195+
6196+
61676197

61686198

61696199
class TestSeriesNonUnique(tm.TestCase):

pandas/util/testing.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from numpy.random import randn, rand
2323
import numpy as np
24+
from numpy.testing import assert_array_equal
2425

2526
import pandas as pd
2627
from pandas.core.common import _is_sequence, array_equivalent

0 commit comments

Comments
 (0)