From bf8ec16c5128b3c4443cd010ca9af43924b3ead1 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 20 Nov 2018 00:06:56 +0000 Subject: [PATCH 1/4] API: Make Series.searchsorted return a scalar, when supplied a scalar --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/base.py | 18 ++++++++++++------ pandas/core/series.py | 6 ++++-- .../tests/arrays/categorical/test_analytics.py | 2 ++ pandas/tests/series/test_analytics.py | 18 ++++++++++-------- pandas/util/testing.py | 3 ++- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fe5e4a57c557a..6938065115f1f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1096,6 +1096,7 @@ Other API Changes has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`) - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`) +- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`). - :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`). - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`). diff --git a/pandas/core/base.py b/pandas/core/base.py index 928e90977f95b..f44ae7feb4973 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1338,8 +1338,8 @@ def factorize(self, sort=False, na_sentinel=-1): Parameters ---------- - value : array_like - Values to insert into `self`. + value : scalar or array_like + Value(s) to insert into `self`. side : {'left', 'right'}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable @@ -1350,8 +1350,14 @@ def factorize(self, sort=False, na_sentinel=-1): Returns ------- - indices : array of ints - Array of insertion points with the same shape as `value`. + int or array of ints + A scalar or array of insertion points with the + same shape as `value`. + + .. versionchanged :: 0.24.0 + Ìf `value`is a scalar, an int is now always returned. + Previously, scalar inputs returned an 1-item array for + :class:`Series` and :class:`Categorical`. See Also -------- @@ -1372,7 +1378,7 @@ def factorize(self, sort=False, na_sentinel=-1): dtype: int64 >>> x.searchsorted(4) - array([3]) + 3 >>> x.searchsorted([0, 4]) array([0, 3]) @@ -1389,7 +1395,7 @@ def factorize(self, sort=False, na_sentinel=-1): Categories (4, object): [apple < bread < cheese < milk] >>> x.searchsorted('bread') - array([1]) # Note: an array, not a scalar + 1 >>> x.searchsorted(['bread'], side='right') array([3]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9ba9cdc818a5e..ec02a52c72c5d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2215,8 +2215,10 @@ def __rmatmul__(self, other): def searchsorted(self, value, side='left', sorter=None): if sorter is not None: sorter = ensure_platform_int(sorter) - return self._values.searchsorted(Series(value)._values, - side=side, sorter=sorter) + result = self._values.searchsorted(Series(value)._values, + side=side, sorter=sorter) + + return result[0] if is_scalar(value) else result # ------------------------------------------------------------------- # Combination diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 4251273e424dd..bd1c64fc0c93b 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -86,9 +86,11 @@ def test_searchsorted(self): # Searching for single item argument, side='left' (default) res_cat = c1.searchsorted('apple') assert res_cat == 2 + assert tm.is_scalar(res_cat) res_ser = s1.searchsorted('apple') assert res_ser == 2 + assert tm.is_scalar(res_ser) # Searching for single item array, side='left' (default) res_cat = c1.searchsorted(['bread']) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 81d60aba44b0f..2b0f18aaad899 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1364,17 +1364,19 @@ def test_numpy_repeat(self): def test_searchsorted(self): s = Series([1, 2, 3]) - idx = s.searchsorted(1, side='left') - tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp)) + result = s.searchsorted(1, side='left') + assert tm.is_scalar(result) + assert result == 0 - idx = s.searchsorted(1, side='right') - tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp)) + result = s.searchsorted(1, side='right') + assert tm.is_scalar(result) + assert result == 1 def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) - e = 2 - assert r == e + assert tm.is_scalar(r) + assert r == 2 r = s.searchsorted([30]) e = np.array([2], dtype=np.intp) @@ -1390,8 +1392,8 @@ def test_search_sorted_datetime64_scalar(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) v = pd.Timestamp('20120102') r = s.searchsorted(v) - e = 1 - assert r == e + assert tm.is_scalar(r) + assert r == 1 def test_search_sorted_datetime64_list(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8a408f5613a01..4d9b9ccf0f074 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -27,7 +27,8 @@ is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetimelike_v_numeric, is_datetimelike_v_object, is_extension_array_dtype, is_interval_dtype, is_list_like, is_number, - is_period_dtype, is_sequence, is_timedelta64_dtype, needs_i8_conversion) + is_period_dtype, is_scalar, is_sequence, is_timedelta64_dtype, + needs_i8_conversion) # noqa from pandas.core.dtypes.missing import array_equivalent import pandas as pd From e8aef0b450d821911dc4dc5670662fcce44487fd Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 20 Nov 2018 04:47:47 +0000 Subject: [PATCH 2/4] Adjust according to comments --- pandas/core/base.py | 2 +- pandas/tests/indexes/multi/test_monotonic.py | 7 +++++++ pandas/util/testing.py | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f44ae7feb4973..1c0329244cad8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1355,7 +1355,7 @@ def factorize(self, sort=False, na_sentinel=-1): same shape as `value`. .. versionchanged :: 0.24.0 - Ìf `value`is a scalar, an int is now always returned. + If `value` is a scalar, an int is now always returned. Previously, scalar inputs returned an 1-item array for :class:`Series` and :class:`Categorical`. diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index 3c7db70b7e242..9c4b65ff374dc 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -5,6 +5,7 @@ import pandas as pd from pandas import Index, IntervalIndex, MultiIndex +import pandas.util.testing as tm def test_is_monotonic_increasing(): @@ -182,22 +183,28 @@ def test_searchsorted_monotonic(indices): # test searchsorted only for increasing if indices.is_monotonic_increasing: ssm_left = indices._searchsorted_monotonic(value, side='left') + assert tm.is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') + assert tm.is_scalar(ssm_right) assert expected_right == ssm_right ss_left = indices.searchsorted(value, side='left') + assert tm.is_scalar(ss_left) assert expected_left == ss_left ss_right = indices.searchsorted(value, side='right') + assert tm.is_scalar(ss_right) assert expected_right == ss_right elif indices.is_monotonic_decreasing: ssm_left = indices._searchsorted_monotonic(value, side='left') + assert tm.is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') + assert tm.is_scalar(ssm_right) assert expected_right == ssm_right else: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 4d9b9ccf0f074..14a2ce72a8cb9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -27,8 +27,8 @@ is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetimelike_v_numeric, is_datetimelike_v_object, is_extension_array_dtype, is_interval_dtype, is_list_like, is_number, - is_period_dtype, is_scalar, is_sequence, is_timedelta64_dtype, - needs_i8_conversion) # noqa + is_period_dtype, is_sequence, is_timedelta64_dtype, needs_i8_conversion) +from pandas.core.dtypes.common import is_scalar # noqa: F401 from pandas.core.dtypes.missing import array_equivalent import pandas as pd From b59e640cc78e6ad8e77b739c38f160dda27ddf87 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 9 Dec 2018 23:56:00 +0000 Subject: [PATCH 3/4] adjust for comments --- pandas/core/base.py | 2 +- pandas/tests/arrays/categorical/test_analytics.py | 5 +++-- pandas/tests/indexes/multi/test_monotonic.py | 14 +++++++------- pandas/tests/series/test_analytics.py | 9 +++++---- pandas/util/testing.py | 1 - 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 1c0329244cad8..86bb2eb8823f8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1339,7 +1339,7 @@ def factorize(self, sort=False, na_sentinel=-1): Parameters ---------- value : scalar or array_like - Value(s) to insert into `self`. + Value(s) to insert into `self`.t side : {'left', 'right'}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index bd1c64fc0c93b..b2c9151e1fa94 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -8,6 +8,7 @@ from pandas.compat import PYPY from pandas import Categorical, Index, Series +from pandas.api.types import is_scalar import pandas.util.testing as tm @@ -86,11 +87,11 @@ def test_searchsorted(self): # Searching for single item argument, side='left' (default) res_cat = c1.searchsorted('apple') assert res_cat == 2 - assert tm.is_scalar(res_cat) + assert is_scalar(res_cat) res_ser = s1.searchsorted('apple') assert res_ser == 2 - assert tm.is_scalar(res_ser) + assert is_scalar(res_ser) # Searching for single item array, side='left' (default) res_cat = c1.searchsorted(['bread']) diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index 9c4b65ff374dc..72e9bcc1e2eb1 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import Index, IntervalIndex, MultiIndex -import pandas.util.testing as tm +from pandas.api.types import is_scalar def test_is_monotonic_increasing(): @@ -183,28 +183,28 @@ def test_searchsorted_monotonic(indices): # test searchsorted only for increasing if indices.is_monotonic_increasing: ssm_left = indices._searchsorted_monotonic(value, side='left') - assert tm.is_scalar(ssm_left) + assert is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') - assert tm.is_scalar(ssm_right) + assert is_scalar(ssm_right) assert expected_right == ssm_right ss_left = indices.searchsorted(value, side='left') - assert tm.is_scalar(ss_left) + assert is_scalar(ss_left) assert expected_left == ss_left ss_right = indices.searchsorted(value, side='right') - assert tm.is_scalar(ss_right) + assert is_scalar(ss_right) assert expected_right == ss_right elif indices.is_monotonic_decreasing: ssm_left = indices._searchsorted_monotonic(value, side='left') - assert tm.is_scalar(ssm_left) + assert is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') - assert tm.is_scalar(ssm_right) + assert is_scalar(ssm_right) assert expected_right == ssm_right else: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 2b0f18aaad899..ab40bdc3ad6a0 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -16,6 +16,7 @@ from pandas import ( Categorical, CategoricalIndex, DataFrame, Series, bdate_range, compat, date_range, isna, notna) +from pandas.api.types import is_scalar from pandas.core.index import MultiIndex from pandas.core.indexes.datetimes import Timestamp from pandas.core.indexes.timedeltas import Timedelta @@ -1365,17 +1366,17 @@ def test_searchsorted(self): s = Series([1, 2, 3]) result = s.searchsorted(1, side='left') - assert tm.is_scalar(result) + assert is_scalar(result) assert result == 0 result = s.searchsorted(1, side='right') - assert tm.is_scalar(result) + assert is_scalar(result) assert result == 1 def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) - assert tm.is_scalar(r) + assert is_scalar(r) assert r == 2 r = s.searchsorted([30]) @@ -1392,7 +1393,7 @@ def test_search_sorted_datetime64_scalar(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) v = pd.Timestamp('20120102') r = s.searchsorted(v) - assert tm.is_scalar(r) + assert is_scalar(r) assert r == 1 def test_search_sorted_datetime64_list(self): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 14a2ce72a8cb9..8a408f5613a01 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -28,7 +28,6 @@ is_datetimelike_v_numeric, is_datetimelike_v_object, is_extension_array_dtype, is_interval_dtype, is_list_like, is_number, is_period_dtype, is_sequence, is_timedelta64_dtype, needs_i8_conversion) -from pandas.core.dtypes.common import is_scalar # noqa: F401 from pandas.core.dtypes.missing import array_equivalent import pandas as pd From c09a4117be7e763680091cd5180f92d1c34dcd0c Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 18 Dec 2018 14:56:26 +0000 Subject: [PATCH 4/4] adjust for comments --- pandas/core/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 86bb2eb8823f8..f3aa18674439b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1338,8 +1338,8 @@ def factorize(self, sort=False, na_sentinel=-1): Parameters ---------- - value : scalar or array_like - Value(s) to insert into `self`.t + value : array_like + Values to insert into `self`. side : {'left', 'right'}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable @@ -1350,7 +1350,7 @@ def factorize(self, sort=False, na_sentinel=-1): Returns ------- - int or array of ints + int or array of int A scalar or array of insertion points with the same shape as `value`.