diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 69ad20269319f..f19b0fe10fe6e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -108,6 +108,7 @@ Other enhancements - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) + .. --------------------------------------------------------------------------- .. _whatsnew_140.notable_bug_fixes: @@ -278,6 +279,7 @@ Other Deprecations - Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`) - Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`) - Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) +- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`) - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b8485864704dd..67eb457ab93b1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7606,8 +7606,9 @@ def between_time( self: FrameOrSeries, start_time, end_time, - include_start: bool_t = True, - include_end: bool_t = True, + include_start: bool_t | lib.NoDefault = lib.no_default, + include_end: bool_t | lib.NoDefault = lib.no_default, + inclusive: str | None = None, axis=None, ) -> FrameOrSeries: """ @@ -7624,8 +7625,20 @@ def between_time( End time as a time filter limit. include_start : bool, default True Whether the start time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. include_end : bool, default True Whether the end time needs to be included in the result. + + .. deprecated:: 1.4.0 + Arguments `include_start` and `include_end` have been deprecated + to standardize boundary inputs. Use `inclusive` instead, to set + each bound as closed or open. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; whether to set each bound as closed or open. axis : {0 or 'index', 1 or 'columns'}, default 0 Determine range time on index or columns value. @@ -7679,8 +7692,46 @@ def between_time( if not isinstance(index, DatetimeIndex): raise TypeError("Index must be DatetimeIndex") + if (include_start != lib.no_default or include_end != lib.no_default) and ( + inclusive is not None + ): + raise ValueError( + "Deprecated arguments `include_start` and `include_end` " + "cannot be passed if `inclusive` has been given." + ) + # If any of the deprecated arguments ('include_start', 'include_end') + # have been passed + elif (include_start != lib.no_default) or (include_end != lib.no_default): + warnings.warn( + "`include_start` and `include_end` are deprecated in " + "favour of `inclusive`.", + FutureWarning, + stacklevel=2, + ) + left = True if isinstance(include_start, lib.NoDefault) else include_start + right = True if isinstance(include_end, lib.NoDefault) else include_end + + inc_dict = { + (True, True): "both", + (True, False): "left", + (False, True): "right", + (False, False): "neither", + } + inclusive = inc_dict[(left, right)] + else: # On arg removal inclusive can default to "both" + if inclusive is None: + inclusive = "both" + elif inclusive not in ["both", "neither", "left", "right"]: + raise ValueError( + f"Inclusive has to be either string of 'both', " + f"'left', 'right', or 'neither'. Got {inclusive}." + ) + indexer = index.indexer_between_time( - start_time, end_time, include_start=include_start, include_end=include_end + start_time, + end_time, + include_start=inclusive in ["both", "left"], + include_end=inclusive in ["both", "right"], ) return self._take_with_is_copy(indexer, axis=axis) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 7d485ee62c7d2..92f81ff3a00fa 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -1,5 +1,3 @@ -from itertools import product - import numpy as np import pytest @@ -11,8 +9,8 @@ import pandas._testing as tm -@pytest.fixture(params=product([True, False], [True, False])) -def close_open_fixture(request): +@pytest.fixture(params=["both", "neither", "left", "right"]) +def inclusive_endpoints_fixture(request): return request.param diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 0daa267767269..ae1aaaaf75d9b 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -69,7 +69,7 @@ def test_between_time_types(self, frame_or_series): with pytest.raises(ValueError, match=msg): obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - def test_between_time(self, close_open_fixture, frame_or_series): + def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) if frame_or_series is not DataFrame: @@ -77,24 +77,25 @@ def test_between_time(self, close_open_fixture, frame_or_series): stime = time(0, 0) etime = time(1, 0) - inc_start, inc_end = close_open_fixture + inclusive = inclusive_endpoints_fixture - filtered = ts.between_time(stime, etime, inc_start, inc_end) + filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = 13 * 4 + 1 - if not inc_start: + + if inclusive in ["right", "neither"]: exp_len -= 5 - if not inc_end: + if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() - if inc_start: + if inclusive in ["left", "both"]: assert t >= stime else: assert t > stime - if inc_end: + if inclusive in ["right", "both"]: assert t <= etime else: assert t < etime @@ -111,22 +112,22 @@ def test_between_time(self, close_open_fixture, frame_or_series): stime = time(22, 0) etime = time(9, 0) - filtered = ts.between_time(stime, etime, inc_start, inc_end) + filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: + if inclusive in ["right", "neither"]: exp_len -= 4 - if not inc_end: + if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() - if inc_start: + if inclusive in ["left", "both"]: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) - if inc_end: + if inclusive in ["right", "both"]: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime) @@ -207,3 +208,91 @@ def test_between_time_datetimeindex(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result, expected2) assert len(result) == 12 + + @pytest.mark.parametrize("include_start", [True, False]) + @pytest.mark.parametrize("include_end", [True, False]) + def test_between_time_warn(self, include_start, include_end, frame_or_series): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + if frame_or_series is not DataFrame: + ts = ts[0] + + stime = time(0, 0) + etime = time(1, 0) + + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + with tm.assert_produces_warning(FutureWarning, match=match): + _ = ts.between_time(stime, etime, include_start, include_end) + + def test_between_time_incorr_arg_inclusive(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + inclusive = "bad_string" + msg = ( + "Inclusive has to be either string of 'both', 'left', 'right', " + "or 'neither'. Got bad_string." + ) + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, inclusive=inclusive) + + @pytest.mark.parametrize( + "include_start, include_end", [(True, None), (True, True), (None, True)] + ) + def test_between_time_incompatiable_args_given(self, include_start, include_end): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + stime = time(0, 0) + etime = time(1, 0) + msg = ( + "Deprecated arguments `include_start` and `include_end` cannot be " + "passed if `inclusive` has been given." + ) + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, include_start, include_end, inclusive="left") + + def test_between_time_same_functionality_old_and_new_args(self): + # GH40245 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + match = ( + "`include_start` and `include_end` " + "are deprecated in favour of `inclusive`." + ) + + result = ts.between_time(stime, etime) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=False) + expected = ts.between_time(stime, etime, inclusive="right") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_end=False) + expected = ts.between_time(stime, etime, inclusive="left") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time( + stime, etime, include_start=False, include_end=False + ) + expected = ts.between_time(stime, etime, inclusive="neither") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=match): + result = ts.between_time(stime, etime, include_start=True, include_end=True) + expected = ts.between_time(stime, etime, inclusive="both") + tm.assert_frame_equal(result, expected)