BUG: Matched searchsorted signature with numpy's

gfyoung · gfyoung · commit 1e6df5ec097d · 2016-03-17T19:28:11.000Z
Closes gh-12238.
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -43,6 +43,7 @@ Enhancements
 
 API changes
 ~~~~~~~~~~~
+- ``searchsorted`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`)
 
 - ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`)
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -7,7 +7,8 @@
 from pandas.core import common as com
 import pandas.core.nanops as nanops
 import pandas.lib as lib
-from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
+from pandas.util.decorators import (Appender, cache_readonly,
+                                    deprecate_kwarg, Substitution)
 from pandas.core.common import AbstractMethodError
 
 _shared_docs = dict()
@@ -990,13 +991,73 @@ def factorize(self, sort=False, na_sentinel=-1):
         from pandas.core.algorithms import factorize
         return factorize(self, sort=sort, na_sentinel=na_sentinel)
 
-    def searchsorted(self, key, side='left'):
-        """ np.ndarray searchsorted compat """
+    _shared_docs['searchsorted'] = (
+        """Find indices where elements should be inserted to maintain order.
 
-        # FIXME in GH7447
-        # needs coercion on the key (DatetimeIndex does alreay)
-        # needs tests/doc-string
-        return self.values.searchsorted(key, side=side)
+        Find the indices into a sorted %(klass)s `self` such that, if the
+        corresponding elements in `v` were inserted before the indices, the
+        order of `self` would be preserved.
+
+        Parameters
+        ----------
+        %(value)s : array_like
+            Values to insert into `self`.
+        side : {'left', 'right'}, optional
+            If 'left', the index of the first suitable location found is given.
+            If 'right', return the last such index.  If there is no suitable
+            index, return either 0 or N (where N is the length of `self`).
+        sorter : 1-D array_like, optional
+            Optional array of integer indices that sort `self` into ascending
+            order. They are typically the result of ``np.argsort``.
+
+        Returns
+        -------
+        indices : array of ints
+            Array of insertion points with the same shape as `v`.
+
+        See Also
+        --------
+        numpy.searchsorted
+
+        Notes
+        -----
+        Binary search is used to find the required insertion points.
+
+        Examples
+        --------
+        >>> x = pd.Series([1, 2, 3])
+        >>> x
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> x.searchsorted(4)
+        array([3])
+        >>> x.searchsorted([0, 4])
+        array([0, 3])
+        >>> x.searchsorted([1, 3], side='left')
+        array([0, 2])
+        >>> x.searchsorted([1, 3], side='right')
+        array([1, 3])
+        >>>
+        >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
+        [apple, bread, bread, cheese, milk]
+        Categories (4, object): [apple < bread < cheese < milk]
+        >>> x.searchsorted('bread')
+        array([1])     # Note: an array, not a scalar
+        >>> x.searchsorted(['bread'])
+        array([1])
+        >>> x.searchsorted(['bread', 'eggs'])
+        array([1, 4])
+        >>> x.searchsorted(['bread', 'eggs'], side='right')
+        array([3, 4])    # eggs before milk
+        """)
+
+    @Substitution(klass='IndexOpsMixin', value='key')
+    @Appender(_shared_docs['searchsorted'])
+    def searchsorted(self, key, side='left', sorter=None):
+        # needs coercion on the key (DatetimeIndex does already)
+        return self.values.searchsorted(key, side=side, sorter=sorter)
 
     _shared_docs['drop_duplicates'] = (
         """Return %(klass)s with duplicate values removed
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -8,10 +8,12 @@
 from pandas.compat import u
 
 from pandas.core.algorithms import factorize
-from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
+from pandas.core.base import (PandasObject, PandasDelegate,
+                              NoNewAttributesMixin, _shared_docs)
 import pandas.core.common as com
 from pandas.core.missing import interpolate_2d
-from pandas.util.decorators import cache_readonly, deprecate_kwarg
+from pandas.util.decorators import (Appender, cache_readonly,
+                                    deprecate_kwarg, Substitution)
 
 from pandas.core.common import (
     ABCSeries, ABCIndexClass, ABCCategoricalIndex, isnull, notnull,
@@ -1003,67 +1005,18 @@ def memory_usage(self, deep=False):
         """
         return self._codes.nbytes + self._categories.memory_usage(deep=deep)
 
+    @Substitution(klass='Categorical', value='v')
+    @Appender(_shared_docs['searchsorted'])
     def searchsorted(self, v, side='left', sorter=None):
-        """Find indices where elements should be inserted to maintain order.
-
-        Find the indices into a sorted Categorical `self` such that, if the
-        corresponding elements in `v` were inserted before the indices, the
-        order of `self` would be preserved.
-
-        Parameters
-        ----------
-        v : array_like
-            Array-like values or a scalar value, to insert/search for in
-            `self`.
-        side : {'left', 'right'}, optional
-            If 'left', the index of the first suitable location found is given.
-            If 'right', return the last such index.  If there is no suitable
-            index, return either 0 or N (where N is the length of `a`).
-        sorter : 1-D array_like, optional
-            Optional array of integer indices that sort `self` into ascending
-            order. They are typically the result of ``np.argsort``.
-
-        Returns
-        -------
-        indices : array of ints
-            Array of insertion points with the same shape as `v`.
-
-        See Also
-        --------
-        Series.searchsorted
-        numpy.searchsorted
-
-        Notes
-        -----
-        Binary search is used to find the required insertion points.
-
-        Examples
-        --------
-        >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
-        [apple, bread, bread, cheese, milk]
-        Categories (4, object): [apple < bread < cheese < milk]
-        >>> x.searchsorted('bread')
-        array([1])     # Note: an array, not a scalar
-        >>> x.searchsorted(['bread'])
-        array([1])
-        >>> x.searchsorted(['bread', 'eggs'])
-        array([1, 4])
-        >>> x.searchsorted(['bread', 'eggs'], side='right')
-        array([3, 4])	    # eggs before milk
-        >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk',
-                                'donuts' ])
-        >>> x.searchsorted(['bread', 'eggs'], side='right',
-                           sorter=[0, 1, 2, 3, 5, 4])
-        array([3, 5]) # eggs after donuts, after switching milk and donuts
-        """
         if not self.ordered:
             raise ValueError("Categorical not ordered\nyou can use "
                              ".as_ordered() to change the Categorical to an "
                              "ordered one")
 
         from pandas.core.series import Series
         values_as_codes = self.categories.values.searchsorted(
-            Series(v).values, side)
+            Series(v).values, side=side)
+
         return self.codes.searchsorted(values_as_codes, sorter=sorter)
 
     def isnull(self):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -49,7 +49,7 @@
 import pandas.core.datetools as datetools
 import pandas.core.format as fmt
 import pandas.core.nanops as nanops
-from pandas.util.decorators import Appender, deprecate_kwarg
+from pandas.util.decorators import Appender, deprecate_kwarg, Substitution
 
 import pandas.lib as lib
 import pandas.tslib as tslib
@@ -1464,63 +1464,11 @@ def dot(self, other):
         else:  # pragma: no cover
             raise TypeError('unsupported type: %s' % type(other))
 
+    @Substitution(klass='Series', value='v')
+    @Appender(base._shared_docs['searchsorted'])
     def searchsorted(self, v, side='left', sorter=None):
-        """Find indices where elements should be inserted to maintain order.
-
-        Find the indices into a sorted Series `self` such that, if the
-        corresponding elements in `v` were inserted before the indices, the
-        order of `self` would be preserved.
-
-        Parameters
-        ----------
-        v : array_like
-            Values to insert into `a`.
-        side : {'left', 'right'}, optional
-            If 'left', the index of the first suitable location found is given.
-            If 'right', return the last such index.  If there is no suitable
-            index, return either 0 or N (where N is the length of `a`).
-        sorter : 1-D array_like, optional
-            Optional array of integer indices that sort `self` into ascending
-            order. They are typically the result of ``np.argsort``.
-
-        Returns
-        -------
-        indices : array of ints
-            Array of insertion points with the same shape as `v`.
-
-        See Also
-        --------
-        Series.sort_values
-        numpy.searchsorted
-
-        Notes
-        -----
-        Binary search is used to find the required insertion points.
-
-        Examples
-        --------
-        >>> x = pd.Series([1, 2, 3])
-        >>> x
-        0    1
-        1    2
-        2    3
-        dtype: int64
-        >>> x.searchsorted(4)
-        array([3])
-        >>> x.searchsorted([0, 4])
-        array([0, 3])
-        >>> x.searchsorted([1, 3], side='left')
-        array([0, 2])
-        >>> x.searchsorted([1, 3], side='right')
-        array([1, 3])
-        >>> x.searchsorted([1, 2], side='right', sorter=[0, 2, 1])
-        array([1, 3])
-        """
-        if sorter is not None:
-            sorter = com._ensure_platform_int(sorter)
-
-        return self._values.searchsorted(Series(v)._values, side=side,
-                                         sorter=sorter)
+        return self._values.searchsorted(Series(v)._values,
+                                         side=side, sorter=sorter)
 
     # -------------------------------------------------------------------
     # Combination
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
@@ -972,6 +972,15 @@ def test_memory_usage(self):
             diff = res_deep - sys.getsizeof(o)
             self.assertTrue(abs(diff) < 100)
 
+    def test_searchsorted(self):
+        # See gh-12238
+        for o in self.objs:
+            index = np.searchsorted(o, max(o))
+            self.assertTrue(0 <= index <= len(o))
+
+            index = np.searchsorted(o, max(o), sorter=range(len(o)))
+            self.assertTrue(0 <= index <= len(o))
+
 
 class TestFloat64HashTable(tm.TestCase):
 
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -5,6 +5,7 @@
 from datetime import time, datetime
 from datetime import timedelta
 import numpy as np
+from pandas.core.base import _shared_docs
 from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
                                 _values_from_object, _maybe_box,
                                 is_object_dtype, is_datetime64_dtype,
@@ -22,7 +23,8 @@
 from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
 from pandas.tseries.tools import parse_time_string, normalize_date, to_time
 from pandas.tseries.timedeltas import to_timedelta
-from pandas.util.decorators import cache_readonly, deprecate_kwarg
+from pandas.util.decorators import (Appender, cache_readonly,
+                                    deprecate_kwarg, Substitution)
 import pandas.core.common as com
 import pandas.tseries.offsets as offsets
 import pandas.tseries.tools as tools
@@ -1629,7 +1631,9 @@ def normalize(self):
         return DatetimeIndex(new_values, freq='infer', name=self.name,
                              tz=self.tz)
 
-    def searchsorted(self, key, side='left'):
+    @Substitution(klass='DatetimeIndex', value='key')
+    @Appender(_shared_docs['searchsorted'])
+    def searchsorted(self, key, side='left', sorter=None):
         if isinstance(key, (np.ndarray, Index)):
             key = np.array(key, dtype=_NS_DTYPE, copy=False)
         else:
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -13,13 +13,14 @@
                             get_period_field_arr, _validate_end_alias,
                             _quarter_to_myear)
 
+from pandas.core.base import _shared_docs
+
 import pandas.core.common as com
 from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box,
                                 _values_from_object, ABCSeries,
                                 is_integer, is_float, is_object_dtype)
 from pandas import compat
-from pandas.util.decorators import cache_readonly
-
+from pandas.util.decorators import Appender, cache_readonly, Substitution
 from pandas.lib import Timedelta
 import pandas.lib as lib
 import pandas.tslib as tslib
@@ -385,7 +386,9 @@ def astype(self, dtype):
             return Index(self.values, dtype)
         raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
 
-    def searchsorted(self, key, side='left'):
+    @Substitution(klass='PeriodIndex', value='key')
+    @Appender(_shared_docs['searchsorted'])
+    def searchsorted(self, key, side='left', sorter=None):
         if isinstance(key, Period):
             if key.freq != self.freq:
                 msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, key.freqstr)
@@ -394,7 +397,7 @@ def searchsorted(self, key, side='left'):
         elif isinstance(key, compat.string_types):
             key = Period(key, freq=self.freq).ordinal
 
-        return self.values.searchsorted(key, side=side)
+        return self.values.searchsorted(key, side=side, sorter=sorter)
 
     @property
     def is_all_dates(self):
diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py
@@ -10,7 +10,9 @@
 import pandas.compat as compat
 from pandas.compat import u
 from pandas.tseries.frequencies import to_offset
+from pandas.core.base import _shared_docs
 import pandas.core.common as com
+from pandas.util.decorators import Appender, Substitution
 from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin
 from pandas.tseries.timedeltas import (to_timedelta,
                                        _coerce_scalar_to_timedelta_type)
@@ -786,13 +788,15 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True):
         # # try to find a the dates
         # return (lhs_mask & rhs_mask).nonzero()[0]
 
-    def searchsorted(self, key, side='left'):
+    @Substitution(klass='TimedeltaIndex', value='key')
+    @Appender(_shared_docs['searchsorted'])
+    def searchsorted(self, key, side='left', sorter=None):
         if isinstance(key, (np.ndarray, Index)):
             key = np.array(key, dtype=_TD_DTYPE, copy=False)
         else:
             key = _to_m8(key)
 
-        return self.values.searchsorted(key, side=side)
+        return self.values.searchsorted(key, side=side, sorter=sorter)
 
     def is_type_compatible(self, typ):
         return typ == self.inferred_type or typ == 'timedelta'