Skip to content

Commit 91d2cfd

Browse files
committed
API: Make Series.searchsorted return a scalar, when supplied a scalar
1 parent 949b148 commit 91d2cfd

File tree

6 files changed

+31
-17
lines changed

6 files changed

+31
-17
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,7 @@ Other API Changes
10591059
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
10601060
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
10611061
- Comparing :class:`Timedelta` to be less or greater than unknown types now raises a ``TypeError`` instead of returning ``False`` (:issue:`20829`)
1062+
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`xxxxx`).
10621063
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
10631064
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
10641065
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).

pandas/core/base.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1336,8 +1336,8 @@ def factorize(self, sort=False, na_sentinel=-1):
13361336
13371337
Parameters
13381338
----------
1339-
value : array_like
1340-
Values to insert into `self`.
1339+
value : scalar or array_like
1340+
Value(s) to insert into `self`.
13411341
side : {'left', 'right'}, optional
13421342
If 'left', the index of the first suitable location found is given.
13431343
If 'right', return the last such index. If there is no suitable
@@ -1348,8 +1348,14 @@ def factorize(self, sort=False, na_sentinel=-1):
13481348
13491349
Returns
13501350
-------
1351-
indices : array of ints
1352-
Array of insertion points with the same shape as `value`.
1351+
int or array of ints
1352+
A scalar or array of insertion points with the
1353+
same shape as `value`.
1354+
1355+
.. versionchanged :: 0.24.0
1356+
Ìf `value`is a scalar, an int is now always returned.
1357+
Previously, scalar inputs returned an 1-item array for
1358+
:class:`Series` and :class:`Categorical`.
13531359
13541360
See Also
13551361
--------
@@ -1370,7 +1376,7 @@ def factorize(self, sort=False, na_sentinel=-1):
13701376
dtype: int64
13711377
13721378
>>> x.searchsorted(4)
1373-
array([3])
1379+
3
13741380
13751381
>>> x.searchsorted([0, 4])
13761382
array([0, 3])
@@ -1387,7 +1393,7 @@ def factorize(self, sort=False, na_sentinel=-1):
13871393
Categories (4, object): [apple < bread < cheese < milk]
13881394
13891395
>>> x.searchsorted('bread')
1390-
array([1]) # Note: an array, not a scalar
1396+
1
13911397
13921398
>>> x.searchsorted(['bread'], side='right')
13931399
array([3])

pandas/core/series.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2214,8 +2214,10 @@ def __rmatmul__(self, other):
22142214
def searchsorted(self, value, side='left', sorter=None):
22152215
if sorter is not None:
22162216
sorter = ensure_platform_int(sorter)
2217-
return self._values.searchsorted(Series(value)._values,
2218-
side=side, sorter=sorter)
2217+
result = self._values.searchsorted(Series(value)._values,
2218+
side=side, sorter=sorter)
2219+
2220+
return result[0] if is_scalar(value) else result
22192221

22202222
# -------------------------------------------------------------------
22212223
# Combination

pandas/tests/arrays/categorical/test_analytics.py

+2
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ def test_searchsorted(self):
8686
# Searching for single item argument, side='left' (default)
8787
res_cat = c1.searchsorted('apple')
8888
assert res_cat == 2
89+
assert tm.is_scalar(res_cat)
8990

9091
res_ser = s1.searchsorted('apple')
9192
assert res_ser == 2
93+
assert tm.is_scalar(res_ser)
9294

9395
# Searching for single item array, side='left' (default)
9496
res_cat = c1.searchsorted(['bread'])

pandas/tests/series/test_analytics.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -1356,17 +1356,19 @@ def test_numpy_repeat(self):
13561356
def test_searchsorted(self):
13571357
s = Series([1, 2, 3])
13581358

1359-
idx = s.searchsorted(1, side='left')
1360-
tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp))
1359+
result = s.searchsorted(1, side='left')
1360+
assert tm.is_scalar(result)
1361+
assert result == 0
13611362

1362-
idx = s.searchsorted(1, side='right')
1363-
tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp))
1363+
result = s.searchsorted(1, side='right')
1364+
assert tm.is_scalar(result)
1365+
assert result == 1
13641366

13651367
def test_searchsorted_numeric_dtypes_scalar(self):
13661368
s = Series([1, 2, 90, 1000, 3e9])
13671369
r = s.searchsorted(30)
1368-
e = 2
1369-
assert r == e
1370+
assert tm.is_scalar(r)
1371+
assert r == 2
13701372

13711373
r = s.searchsorted([30])
13721374
e = np.array([2], dtype=np.intp)
@@ -1382,8 +1384,8 @@ def test_search_sorted_datetime64_scalar(self):
13821384
s = Series(pd.date_range('20120101', periods=10, freq='2D'))
13831385
v = pd.Timestamp('20120102')
13841386
r = s.searchsorted(v)
1385-
e = 1
1386-
assert r == e
1387+
assert tm.is_scalar(r)
1388+
assert r == 1
13871389

13881390
def test_search_sorted_datetime64_list(self):
13891391
s = Series(pd.date_range('20120101', periods=10, freq='2D'))

pandas/util/testing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
2828
is_datetimelike_v_numeric, is_datetimelike_v_object,
2929
is_extension_array_dtype, is_interval_dtype, is_list_like, is_number,
30-
is_period_dtype, is_sequence, is_timedelta64_dtype, needs_i8_conversion)
30+
is_period_dtype, is_scalar, is_sequence, is_timedelta64_dtype,
31+
needs_i8_conversion) # noqa
3132
from pandas.core.dtypes.missing import array_equivalent
3233

3334
import pandas as pd

0 commit comments

Comments
 (0)