From bcf2638dba288756bf5c657146ab1a5580a3f1ff Mon Sep 17 00:00:00 2001 From: Kiley Hewitt <42876297+hewittk@users.noreply.github.com> Date: Fri, 25 Jun 2021 03:23:13 -0600 Subject: [PATCH] Backport PR #40628: ENH: New boundary inputs --- doc/source/whatsnew/v1.3.0.rst | 2 + pandas/core/series.py | 33 +++++++++++--- pandas/tests/series/methods/test_between.py | 48 ++++++++++++++++++++- 3 files changed, 77 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b92e414f2055e..cb8df16d6c0fb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -276,6 +276,7 @@ Other enhancements - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`) - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`) - Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`) +- :meth:`Series.between` can now accept ``left`` or ``right`` as arguments to ``inclusive`` to include only the left or right boundary (:issue:`40245`) - :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`) .. --------------------------------------------------------------------------- @@ -838,6 +839,7 @@ Other Deprecations - Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`) - In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`) - Deprecated passing lists as ``key`` to :meth:`DataFrame.xs` and :meth:`Series.xs` (:issue:`41760`) +- Deprecated boolean arguments of ``inclusive`` in :meth:`Series.between` to have ``{"left", "right", "neither", "both"}`` as standard argument values (:issue:`40628`) - Deprecated passing arguments as positional for all of the following, with exceptions noted (:issue:`41485`): - :func:`concat` (other than ``objs``) diff --git a/pandas/core/series.py b/pandas/core/series.py index 24f433db45e60..c1722718364d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4967,7 +4967,7 @@ def isin(self, values) -> Series: self, method="isin" ) - def between(self, left, right, inclusive=True) -> Series: + def between(self, left, right, inclusive="both") -> Series: """ Return boolean Series equivalent to left <= series <= right. @@ -4981,8 +4981,9 @@ def between(self, left, right, inclusive=True) -> Series: Left boundary. right : scalar or list-like Right boundary. - inclusive : bool, default True - Include boundaries. + inclusive : {"both", "neither", "left", "right"} + Include boundaries. Whether to set each bound as closed or open. + .. versionchanged:: 1.3.0 Returns ------- @@ -5033,12 +5034,34 @@ def between(self, left, right, inclusive=True) -> Series: 3 False dtype: bool """ - if inclusive: + if inclusive is True or inclusive is False: + warnings.warn( + "Boolean inputs to the `inclusive` argument are deprecated in" + "favour of `both` or `neither`.", + FutureWarning, + stacklevel=2, + ) + if inclusive: + inclusive = "both" + else: + inclusive = "neither" + if inclusive == "both": lmask = self >= left rmask = self <= right - else: + elif inclusive == "left": + lmask = self >= left + rmask = self < right + elif inclusive == "right": + lmask = self > left + rmask = self <= right + elif inclusive == "neither": lmask = self > left rmask = self < right + else: + raise ValueError( + "Inclusive has to be either string of 'both'," + "'left', 'right', or 'neither'." + ) return lmask & rmask diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py index 381c733619c6b..9c11b71e4bee6 100644 --- a/pandas/tests/series/methods/test_between.py +++ b/pandas/tests/series/methods/test_between.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( Series, @@ -28,7 +29,7 @@ def test_between_datetime_values(self): expected = ser[3:18].dropna() tm.assert_series_equal(result, expected) - result = ser[ser.between(ser[3], ser[17], inclusive=False)] + result = ser[ser.between(ser[3], ser[17], inclusive="neither")] expected = ser[5:16].dropna() tm.assert_series_equal(result, expected) @@ -38,3 +39,48 @@ def test_between_period_values(self): result = ser.between(left, right) expected = (ser >= left) & (ser <= right) tm.assert_series_equal(result, expected) + + def test_between_inclusive_string(self): # :issue:`40628` + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right, inclusive="both") + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="left") + expected = (series >= left) & (series < right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="right") + expected = (series > left) & (series <= right) + tm.assert_series_equal(result, expected) + + result = series.between(left, right, inclusive="neither") + expected = (series > left) & (series < right) + tm.assert_series_equal(result, expected) + + def test_between_error_args(self): # :issue:`40628` + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + value_error_msg = ( + "Inclusive has to be either string of 'both'," + "'left', 'right', or 'neither'." + ) + + with pytest.raises(ValueError, match=value_error_msg): + series = Series(date_range("1/1/2000", periods=10)) + series.between(left, right, inclusive="yes") + + def test_between_inclusive_warning(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + with tm.assert_produces_warning(FutureWarning): + result = series.between(left, right, inclusive=False) + expected = (series > left) & (series < right) + tm.assert_series_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = series.between(left, right, inclusive=True) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected)