Skip to content

Commit 1e6df5e

Browse files
gfyounggfyoung
gfyoung
authored andcommitted
BUG: Matched searchsorted signature with numpy's
Closes gh-12238.
1 parent 0f3a7b8 commit 1e6df5e

File tree

8 files changed

+110
-127
lines changed

8 files changed

+110
-127
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Enhancements
4343

4444
API changes
4545
~~~~~~~~~~~
46+
- ``searchsorted`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`)
4647

4748
- ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`)
4849

pandas/core/base.py

+68-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from pandas.core import common as com
88
import pandas.core.nanops as nanops
99
import pandas.lib as lib
10-
from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
10+
from pandas.util.decorators import (Appender, cache_readonly,
11+
deprecate_kwarg, Substitution)
1112
from pandas.core.common import AbstractMethodError
1213

1314
_shared_docs = dict()
@@ -990,13 +991,73 @@ def factorize(self, sort=False, na_sentinel=-1):
990991
from pandas.core.algorithms import factorize
991992
return factorize(self, sort=sort, na_sentinel=na_sentinel)
992993

993-
def searchsorted(self, key, side='left'):
994-
""" np.ndarray searchsorted compat """
994+
_shared_docs['searchsorted'] = (
995+
"""Find indices where elements should be inserted to maintain order.
995996
996-
# FIXME in GH7447
997-
# needs coercion on the key (DatetimeIndex does alreay)
998-
# needs tests/doc-string
999-
return self.values.searchsorted(key, side=side)
997+
Find the indices into a sorted %(klass)s `self` such that, if the
998+
corresponding elements in `v` were inserted before the indices, the
999+
order of `self` would be preserved.
1000+
1001+
Parameters
1002+
----------
1003+
%(value)s : array_like
1004+
Values to insert into `self`.
1005+
side : {'left', 'right'}, optional
1006+
If 'left', the index of the first suitable location found is given.
1007+
If 'right', return the last such index. If there is no suitable
1008+
index, return either 0 or N (where N is the length of `self`).
1009+
sorter : 1-D array_like, optional
1010+
Optional array of integer indices that sort `self` into ascending
1011+
order. They are typically the result of ``np.argsort``.
1012+
1013+
Returns
1014+
-------
1015+
indices : array of ints
1016+
Array of insertion points with the same shape as `v`.
1017+
1018+
See Also
1019+
--------
1020+
numpy.searchsorted
1021+
1022+
Notes
1023+
-----
1024+
Binary search is used to find the required insertion points.
1025+
1026+
Examples
1027+
--------
1028+
>>> x = pd.Series([1, 2, 3])
1029+
>>> x
1030+
0 1
1031+
1 2
1032+
2 3
1033+
dtype: int64
1034+
>>> x.searchsorted(4)
1035+
array([3])
1036+
>>> x.searchsorted([0, 4])
1037+
array([0, 3])
1038+
>>> x.searchsorted([1, 3], side='left')
1039+
array([0, 2])
1040+
>>> x.searchsorted([1, 3], side='right')
1041+
array([1, 3])
1042+
>>>
1043+
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
1044+
[apple, bread, bread, cheese, milk]
1045+
Categories (4, object): [apple < bread < cheese < milk]
1046+
>>> x.searchsorted('bread')
1047+
array([1]) # Note: an array, not a scalar
1048+
>>> x.searchsorted(['bread'])
1049+
array([1])
1050+
>>> x.searchsorted(['bread', 'eggs'])
1051+
array([1, 4])
1052+
>>> x.searchsorted(['bread', 'eggs'], side='right')
1053+
array([3, 4]) # eggs before milk
1054+
""")
1055+
1056+
@Substitution(klass='IndexOpsMixin', value='key')
1057+
@Appender(_shared_docs['searchsorted'])
1058+
def searchsorted(self, key, side='left', sorter=None):
1059+
# needs coercion on the key (DatetimeIndex does already)
1060+
return self.values.searchsorted(key, side=side, sorter=sorter)
10001061

10011062
_shared_docs['drop_duplicates'] = (
10021063
"""Return %(klass)s with duplicate values removed

pandas/core/categorical.py

+8-55
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
from pandas.compat import u
99

1010
from pandas.core.algorithms import factorize
11-
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
11+
from pandas.core.base import (PandasObject, PandasDelegate,
12+
NoNewAttributesMixin, _shared_docs)
1213
import pandas.core.common as com
1314
from pandas.core.missing import interpolate_2d
14-
from pandas.util.decorators import cache_readonly, deprecate_kwarg
15+
from pandas.util.decorators import (Appender, cache_readonly,
16+
deprecate_kwarg, Substitution)
1517

1618
from pandas.core.common import (
1719
ABCSeries, ABCIndexClass, ABCCategoricalIndex, isnull, notnull,
@@ -1003,67 +1005,18 @@ def memory_usage(self, deep=False):
10031005
"""
10041006
return self._codes.nbytes + self._categories.memory_usage(deep=deep)
10051007

1008+
@Substitution(klass='Categorical', value='v')
1009+
@Appender(_shared_docs['searchsorted'])
10061010
def searchsorted(self, v, side='left', sorter=None):
1007-
"""Find indices where elements should be inserted to maintain order.
1008-
1009-
Find the indices into a sorted Categorical `self` such that, if the
1010-
corresponding elements in `v` were inserted before the indices, the
1011-
order of `self` would be preserved.
1012-
1013-
Parameters
1014-
----------
1015-
v : array_like
1016-
Array-like values or a scalar value, to insert/search for in
1017-
`self`.
1018-
side : {'left', 'right'}, optional
1019-
If 'left', the index of the first suitable location found is given.
1020-
If 'right', return the last such index. If there is no suitable
1021-
index, return either 0 or N (where N is the length of `a`).
1022-
sorter : 1-D array_like, optional
1023-
Optional array of integer indices that sort `self` into ascending
1024-
order. They are typically the result of ``np.argsort``.
1025-
1026-
Returns
1027-
-------
1028-
indices : array of ints
1029-
Array of insertion points with the same shape as `v`.
1030-
1031-
See Also
1032-
--------
1033-
Series.searchsorted
1034-
numpy.searchsorted
1035-
1036-
Notes
1037-
-----
1038-
Binary search is used to find the required insertion points.
1039-
1040-
Examples
1041-
--------
1042-
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
1043-
[apple, bread, bread, cheese, milk]
1044-
Categories (4, object): [apple < bread < cheese < milk]
1045-
>>> x.searchsorted('bread')
1046-
array([1]) # Note: an array, not a scalar
1047-
>>> x.searchsorted(['bread'])
1048-
array([1])
1049-
>>> x.searchsorted(['bread', 'eggs'])
1050-
array([1, 4])
1051-
>>> x.searchsorted(['bread', 'eggs'], side='right')
1052-
array([3, 4]) # eggs before milk
1053-
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk',
1054-
'donuts' ])
1055-
>>> x.searchsorted(['bread', 'eggs'], side='right',
1056-
sorter=[0, 1, 2, 3, 5, 4])
1057-
array([3, 5]) # eggs after donuts, after switching milk and donuts
1058-
"""
10591011
if not self.ordered:
10601012
raise ValueError("Categorical not ordered\nyou can use "
10611013
".as_ordered() to change the Categorical to an "
10621014
"ordered one")
10631015

10641016
from pandas.core.series import Series
10651017
values_as_codes = self.categories.values.searchsorted(
1066-
Series(v).values, side)
1018+
Series(v).values, side=side)
1019+
10671020
return self.codes.searchsorted(values_as_codes, sorter=sorter)
10681021

10691022
def isnull(self):

pandas/core/series.py

+5-57
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
import pandas.core.datetools as datetools
5050
import pandas.core.format as fmt
5151
import pandas.core.nanops as nanops
52-
from pandas.util.decorators import Appender, deprecate_kwarg
52+
from pandas.util.decorators import Appender, deprecate_kwarg, Substitution
5353

5454
import pandas.lib as lib
5555
import pandas.tslib as tslib
@@ -1464,63 +1464,11 @@ def dot(self, other):
14641464
else: # pragma: no cover
14651465
raise TypeError('unsupported type: %s' % type(other))
14661466

1467+
@Substitution(klass='Series', value='v')
1468+
@Appender(base._shared_docs['searchsorted'])
14671469
def searchsorted(self, v, side='left', sorter=None):
1468-
"""Find indices where elements should be inserted to maintain order.
1469-
1470-
Find the indices into a sorted Series `self` such that, if the
1471-
corresponding elements in `v` were inserted before the indices, the
1472-
order of `self` would be preserved.
1473-
1474-
Parameters
1475-
----------
1476-
v : array_like
1477-
Values to insert into `a`.
1478-
side : {'left', 'right'}, optional
1479-
If 'left', the index of the first suitable location found is given.
1480-
If 'right', return the last such index. If there is no suitable
1481-
index, return either 0 or N (where N is the length of `a`).
1482-
sorter : 1-D array_like, optional
1483-
Optional array of integer indices that sort `self` into ascending
1484-
order. They are typically the result of ``np.argsort``.
1485-
1486-
Returns
1487-
-------
1488-
indices : array of ints
1489-
Array of insertion points with the same shape as `v`.
1490-
1491-
See Also
1492-
--------
1493-
Series.sort_values
1494-
numpy.searchsorted
1495-
1496-
Notes
1497-
-----
1498-
Binary search is used to find the required insertion points.
1499-
1500-
Examples
1501-
--------
1502-
>>> x = pd.Series([1, 2, 3])
1503-
>>> x
1504-
0 1
1505-
1 2
1506-
2 3
1507-
dtype: int64
1508-
>>> x.searchsorted(4)
1509-
array([3])
1510-
>>> x.searchsorted([0, 4])
1511-
array([0, 3])
1512-
>>> x.searchsorted([1, 3], side='left')
1513-
array([0, 2])
1514-
>>> x.searchsorted([1, 3], side='right')
1515-
array([1, 3])
1516-
>>> x.searchsorted([1, 2], side='right', sorter=[0, 2, 1])
1517-
array([1, 3])
1518-
"""
1519-
if sorter is not None:
1520-
sorter = com._ensure_platform_int(sorter)
1521-
1522-
return self._values.searchsorted(Series(v)._values, side=side,
1523-
sorter=sorter)
1470+
return self._values.searchsorted(Series(v)._values,
1471+
side=side, sorter=sorter)
15241472

15251473
# -------------------------------------------------------------------
15261474
# Combination

pandas/tests/test_base.py

+9
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,15 @@ def test_memory_usage(self):
972972
diff = res_deep - sys.getsizeof(o)
973973
self.assertTrue(abs(diff) < 100)
974974

975+
def test_searchsorted(self):
976+
# See gh-12238
977+
for o in self.objs:
978+
index = np.searchsorted(o, max(o))
979+
self.assertTrue(0 <= index <= len(o))
980+
981+
index = np.searchsorted(o, max(o), sorter=range(len(o)))
982+
self.assertTrue(0 <= index <= len(o))
983+
975984

976985
class TestFloat64HashTable(tm.TestCase):
977986

pandas/tseries/index.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import time, datetime
66
from datetime import timedelta
77
import numpy as np
8+
from pandas.core.base import _shared_docs
89
from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
910
_values_from_object, _maybe_box,
1011
is_object_dtype, is_datetime64_dtype,
@@ -22,7 +23,8 @@
2223
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
2324
from pandas.tseries.tools import parse_time_string, normalize_date, to_time
2425
from pandas.tseries.timedeltas import to_timedelta
25-
from pandas.util.decorators import cache_readonly, deprecate_kwarg
26+
from pandas.util.decorators import (Appender, cache_readonly,
27+
deprecate_kwarg, Substitution)
2628
import pandas.core.common as com
2729
import pandas.tseries.offsets as offsets
2830
import pandas.tseries.tools as tools
@@ -1629,7 +1631,9 @@ def normalize(self):
16291631
return DatetimeIndex(new_values, freq='infer', name=self.name,
16301632
tz=self.tz)
16311633

1632-
def searchsorted(self, key, side='left'):
1634+
@Substitution(klass='DatetimeIndex', value='key')
1635+
@Appender(_shared_docs['searchsorted'])
1636+
def searchsorted(self, key, side='left', sorter=None):
16331637
if isinstance(key, (np.ndarray, Index)):
16341638
key = np.array(key, dtype=_NS_DTYPE, copy=False)
16351639
else:

pandas/tseries/period.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@
1313
get_period_field_arr, _validate_end_alias,
1414
_quarter_to_myear)
1515

16+
from pandas.core.base import _shared_docs
17+
1618
import pandas.core.common as com
1719
from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box,
1820
_values_from_object, ABCSeries,
1921
is_integer, is_float, is_object_dtype)
2022
from pandas import compat
21-
from pandas.util.decorators import cache_readonly
22-
23+
from pandas.util.decorators import Appender, cache_readonly, Substitution
2324
from pandas.lib import Timedelta
2425
import pandas.lib as lib
2526
import pandas.tslib as tslib
@@ -385,7 +386,9 @@ def astype(self, dtype):
385386
return Index(self.values, dtype)
386387
raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
387388

388-
def searchsorted(self, key, side='left'):
389+
@Substitution(klass='PeriodIndex', value='key')
390+
@Appender(_shared_docs['searchsorted'])
391+
def searchsorted(self, key, side='left', sorter=None):
389392
if isinstance(key, Period):
390393
if key.freq != self.freq:
391394
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, key.freqstr)
@@ -394,7 +397,7 @@ def searchsorted(self, key, side='left'):
394397
elif isinstance(key, compat.string_types):
395398
key = Period(key, freq=self.freq).ordinal
396399

397-
return self.values.searchsorted(key, side=side)
400+
return self.values.searchsorted(key, side=side, sorter=sorter)
398401

399402
@property
400403
def is_all_dates(self):

pandas/tseries/tdi.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import pandas.compat as compat
1111
from pandas.compat import u
1212
from pandas.tseries.frequencies import to_offset
13+
from pandas.core.base import _shared_docs
1314
import pandas.core.common as com
15+
from pandas.util.decorators import Appender, Substitution
1416
from pandas.tseries.base import TimelikeOps, DatetimeIndexOpsMixin
1517
from pandas.tseries.timedeltas import (to_timedelta,
1618
_coerce_scalar_to_timedelta_type)
@@ -786,13 +788,15 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True):
786788
# # try to find a the dates
787789
# return (lhs_mask & rhs_mask).nonzero()[0]
788790

789-
def searchsorted(self, key, side='left'):
791+
@Substitution(klass='TimedeltaIndex', value='key')
792+
@Appender(_shared_docs['searchsorted'])
793+
def searchsorted(self, key, side='left', sorter=None):
790794
if isinstance(key, (np.ndarray, Index)):
791795
key = np.array(key, dtype=_TD_DTYPE, copy=False)
792796
else:
793797
key = _to_m8(key)
794798

795-
return self.values.searchsorted(key, side=side)
799+
return self.values.searchsorted(key, side=side, sorter=sorter)
796800

797801
def is_type_compatible(self, typ):
798802
return typ == self.inferred_type or typ == 'timedelta'

0 commit comments

Comments
 (0)