From b766a74fdd07749eeb71672a9cf09674bc881ae1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 12 Jun 2014 23:02:49 -0400 Subject: [PATCH] WIP: searchsorted implementation --- doc/source/api.rst | 1 + doc/source/basics.rst | 13 +++++++++ doc/source/v0.15.0.txt | 2 ++ pandas/core/series.py | 56 +++++++++++++++++++++++++++++++++++++ pandas/tests/test_series.py | 32 ++++++++++++++++++++- pandas/util/testing.py | 1 + 6 files changed, 104 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index cce15685035d0..4dd055bce0a0a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -415,6 +415,7 @@ Reshaping, sorting Series.sortlevel Series.swaplevel Series.unstack + Series.searchsorted Combining / joining / merging ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/basics.rst b/doc/source/basics.rst index b32874f5ca7d8..d0094eae16d38 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1230,6 +1230,19 @@ argument: compatibility with NumPy methods which expect the ``ndarray.sort`` behavior. ``Series.order`` returns a copy of the sorted data. +Series has the ``searchsorted`` method, which works similar to +``np.ndarray.searchsorted``. + +.. ipython:: python + + ser = Series([1, 2, 3]) + ser.searchsorted([0, 3]) + ser.searchsorted([0, 4]) + ser.searchsorted([1, 3], side='right') + ser.searchsorted([1, 3], side='left') + ser = Series([3, 1, 2]) + ser.searchsorted([0, 3], sorter=np.argsort(ser)) + .. _basics.nsorted: smallest / largest values diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 1d9acadb68e58..b7d0f9c8f247f 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -821,6 +821,8 @@ Enhancements - ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`) +- Added ``searchsorted`` method to ``Series`` objects (:issue:`7447`) + .. _whatsnew_0150.performance: Performance diff --git a/pandas/core/series.py b/pandas/core/series.py index 37f66fc56ea56..24cfe9c54b3d9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1373,6 +1373,62 @@ def dot(self, other): else: # pragma: no cover raise TypeError('unsupported type: %s' % type(other)) + def searchsorted(self, v, side='left', sorter=None): + """Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted Series `self` such that, if the + corresponding elements in `v` were inserted before the indices, the + order of `self` would be preserved. + + Parameters + ---------- + v : array_like + Values to insert into `a`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `a`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + + Returns + ------- + indices : array of ints + Array of insertion points with the same shape as `v`. + + See Also + -------- + Series.sort + Series.order + numpy.searchsorted + + Notes + ----- + Binary search is used to find the required insertion points. + + Examples + -------- + >>> x = pd.Series([1, 2, 3]) + >>> x + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> x.searchsorted(4) + array([3]) + >>> x.searchsorted([0, 4]) + array([0, 3]) + >>> x.searchsorted([1, 3], side='left') + array([0, 2]) + >>> x.searchsorted([1, 3], side='right') + array([1, 3]) + >>> x.searchsorted([1, 2], side='right', sorter=[0, 2, 1]) + array([1, 3]) + """ + return self.values.searchsorted(Series(v).values, side=side, + sorter=sorter) + #------------------------------------------------------------------------------ # Combination diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 54040ced71e61..a8599bcda8513 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5956,7 +5956,6 @@ def test_replace_with_single_list(self): s.replace([1,2,3],inplace=True,method='crash_cymbal') assert_series_equal(s, ser) - def test_replace_mixed_types(self): s = Series(np.arange(5),dtype='int64') @@ -6164,6 +6163,37 @@ def test_concat_empty_series_dtypes(self): self.assertEqual(pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype, np.int32) + def test_searchsorted_numeric_dtypes_scalar(self): + s = Series([1, 2, 90, 1000, 3e9]) + r = s.searchsorted(30) + e = 2 + tm.assert_equal(r, e) + + r = s.searchsorted([30]) + e = np.array([2]) + tm.assert_array_equal(r, e) + + def test_searchsorted_numeric_dtypes_vector(self): + s = Series([1, 2, 90, 1000, 3e9]) + r = s.searchsorted([91, 2e6]) + e = np.array([3, 4]) + tm.assert_array_equal(r, e) + + def test_search_sorted_datetime64_scalar(self): + s = Series(pd.date_range('20120101', periods=10, freq='2D')) + v = pd.Timestamp('20120102') + r = s.searchsorted(v) + e = 1 + tm.assert_equal(r, e) + + def test_search_sorted_datetime64_list(self): + s = Series(pd.date_range('20120101', periods=10, freq='2D')) + v = [pd.Timestamp('20120102'), pd.Timestamp('20120104')] + r = s.searchsorted(v) + e = np.array([1, 2]) + tm.assert_array_equal(r, e) + + class TestSeriesNonUnique(tm.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2b775201d9900..977d445f917a8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -21,6 +21,7 @@ from numpy.random import randn, rand import numpy as np +from numpy.testing import assert_array_equal import pandas as pd from pandas.core.common import _is_sequence, array_equivalent