Skip to content

Commit 8c58817

Browse files
topper-123jreback
authored andcommitted
API: Make Series.searchsorted return a scalar, when supplied a scalar (#23801)
1 parent 0bb3772 commit 8c58817

File tree

6 files changed

+36
-14
lines changed

6 files changed

+36
-14
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,7 @@ Other API Changes
10981098
has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
10991099
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
11001100
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
1101+
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
11011102
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
11021103
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
11031104
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).

pandas/core/base.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -1387,8 +1387,14 @@ def factorize(self, sort=False, na_sentinel=-1):
13871387
13881388
Returns
13891389
-------
1390-
indices : array of ints
1391-
Array of insertion points with the same shape as `value`.
1390+
int or array of int
1391+
A scalar or array of insertion points with the
1392+
same shape as `value`.
1393+
1394+
.. versionchanged :: 0.24.0
1395+
If `value` is a scalar, an int is now always returned.
1396+
Previously, scalar inputs returned an 1-item array for
1397+
:class:`Series` and :class:`Categorical`.
13921398
13931399
See Also
13941400
--------
@@ -1409,7 +1415,7 @@ def factorize(self, sort=False, na_sentinel=-1):
14091415
dtype: int64
14101416
14111417
>>> x.searchsorted(4)
1412-
array([3])
1418+
3
14131419
14141420
>>> x.searchsorted([0, 4])
14151421
array([0, 3])
@@ -1426,7 +1432,7 @@ def factorize(self, sort=False, na_sentinel=-1):
14261432
Categories (4, object): [apple < bread < cheese < milk]
14271433
14281434
>>> x.searchsorted('bread')
1429-
array([1]) # Note: an array, not a scalar
1435+
1
14301436
14311437
>>> x.searchsorted(['bread'], side='right')
14321438
array([3])

pandas/core/series.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2215,8 +2215,10 @@ def __rmatmul__(self, other):
22152215
def searchsorted(self, value, side='left', sorter=None):
22162216
if sorter is not None:
22172217
sorter = ensure_platform_int(sorter)
2218-
return self._values.searchsorted(Series(value)._values,
2219-
side=side, sorter=sorter)
2218+
result = self._values.searchsorted(Series(value)._values,
2219+
side=side, sorter=sorter)
2220+
2221+
return result[0] if is_scalar(value) else result
22202222

22212223
# -------------------------------------------------------------------
22222224
# Combination

pandas/tests/arrays/categorical/test_analytics.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.compat import PYPY
99

1010
from pandas import Categorical, Index, Series
11+
from pandas.api.types import is_scalar
1112
import pandas.util.testing as tm
1213

1314

@@ -86,9 +87,11 @@ def test_searchsorted(self):
8687
# Searching for single item argument, side='left' (default)
8788
res_cat = c1.searchsorted('apple')
8889
assert res_cat == 2
90+
assert is_scalar(res_cat)
8991

9092
res_ser = s1.searchsorted('apple')
9193
assert res_ser == 2
94+
assert is_scalar(res_ser)
9295

9396
# Searching for single item array, side='left' (default)
9497
res_cat = c1.searchsorted(['bread'])

pandas/tests/indexes/multi/test_monotonic.py

+7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
from pandas import Index, IntervalIndex, MultiIndex
8+
from pandas.api.types import is_scalar
89

910

1011
def test_is_monotonic_increasing():
@@ -182,22 +183,28 @@ def test_searchsorted_monotonic(indices):
182183
# test searchsorted only for increasing
183184
if indices.is_monotonic_increasing:
184185
ssm_left = indices._searchsorted_monotonic(value, side='left')
186+
assert is_scalar(ssm_left)
185187
assert expected_left == ssm_left
186188

187189
ssm_right = indices._searchsorted_monotonic(value, side='right')
190+
assert is_scalar(ssm_right)
188191
assert expected_right == ssm_right
189192

190193
ss_left = indices.searchsorted(value, side='left')
194+
assert is_scalar(ss_left)
191195
assert expected_left == ss_left
192196

193197
ss_right = indices.searchsorted(value, side='right')
198+
assert is_scalar(ss_right)
194199
assert expected_right == ss_right
195200

196201
elif indices.is_monotonic_decreasing:
197202
ssm_left = indices._searchsorted_monotonic(value, side='left')
203+
assert is_scalar(ssm_left)
198204
assert expected_left == ssm_left
199205

200206
ssm_right = indices._searchsorted_monotonic(value, side='right')
207+
assert is_scalar(ssm_right)
201208
assert expected_right == ssm_right
202209

203210
else:

pandas/tests/series/test_analytics.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas import (
1717
Categorical, CategoricalIndex, DataFrame, Series, bdate_range, compat,
1818
date_range, isna, notna)
19+
from pandas.api.types import is_scalar
1920
from pandas.core.index import MultiIndex
2021
from pandas.core.indexes.datetimes import Timestamp
2122
from pandas.core.indexes.timedeltas import Timedelta
@@ -1364,17 +1365,19 @@ def test_numpy_repeat(self):
13641365
def test_searchsorted(self):
13651366
s = Series([1, 2, 3])
13661367

1367-
idx = s.searchsorted(1, side='left')
1368-
tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp))
1368+
result = s.searchsorted(1, side='left')
1369+
assert is_scalar(result)
1370+
assert result == 0
13691371

1370-
idx = s.searchsorted(1, side='right')
1371-
tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp))
1372+
result = s.searchsorted(1, side='right')
1373+
assert is_scalar(result)
1374+
assert result == 1
13721375

13731376
def test_searchsorted_numeric_dtypes_scalar(self):
13741377
s = Series([1, 2, 90, 1000, 3e9])
13751378
r = s.searchsorted(30)
1376-
e = 2
1377-
assert r == e
1379+
assert is_scalar(r)
1380+
assert r == 2
13781381

13791382
r = s.searchsorted([30])
13801383
e = np.array([2], dtype=np.intp)
@@ -1390,8 +1393,8 @@ def test_search_sorted_datetime64_scalar(self):
13901393
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
13911394
v = pd.Timestamp('20120102')
13921395
r = s.searchsorted(v)
1393-
e = 1
1394-
assert r == e
1396+
assert is_scalar(r)
1397+
assert r == 1
13951398

13961399
def test_search_sorted_datetime64_list(self):
13971400
s = Series(pd.date_range('20120101', periods=10, freq='2D'))

0 commit comments

Comments
 (0)