Skip to content

Commit df1de66

Browse files
ENH: consistency of input args for boundaries in DataFrame.between_time() #40245 (#43248)
1 parent 449eaae commit df1de66

File tree

4 files changed

+159
-19
lines changed

4 files changed

+159
-19
lines changed

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Other enhancements
108108
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
109109
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
110110

111+
111112
.. ---------------------------------------------------------------------------
112113
113114
.. _whatsnew_140.notable_bug_fixes:
@@ -278,6 +279,7 @@ Other Deprecations
278279
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
279280
- Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`)
280281
- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
282+
- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
281283
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
282284

283285
.. ---------------------------------------------------------------------------

pandas/core/generic.py

+54-3
Original file line numberDiff line numberDiff line change
@@ -7606,8 +7606,9 @@ def between_time(
76067606
self: FrameOrSeries,
76077607
start_time,
76087608
end_time,
7609-
include_start: bool_t = True,
7610-
include_end: bool_t = True,
7609+
include_start: bool_t | lib.NoDefault = lib.no_default,
7610+
include_end: bool_t | lib.NoDefault = lib.no_default,
7611+
inclusive: str | None = None,
76117612
axis=None,
76127613
) -> FrameOrSeries:
76137614
"""
@@ -7624,8 +7625,20 @@ def between_time(
76247625
End time as a time filter limit.
76257626
include_start : bool, default True
76267627
Whether the start time needs to be included in the result.
7628+
7629+
.. deprecated:: 1.4.0
7630+
Arguments `include_start` and `include_end` have been deprecated
7631+
to standardize boundary inputs. Use `inclusive` instead, to set
7632+
each bound as closed or open.
76277633
include_end : bool, default True
76287634
Whether the end time needs to be included in the result.
7635+
7636+
.. deprecated:: 1.4.0
7637+
Arguments `include_start` and `include_end` have been deprecated
7638+
to standardize boundary inputs. Use `inclusive` instead, to set
7639+
each bound as closed or open.
7640+
inclusive : {"both", "neither", "left", "right"}, default "both"
7641+
Include boundaries; whether to set each bound as closed or open.
76297642
axis : {0 or 'index', 1 or 'columns'}, default 0
76307643
Determine range time on index or columns value.
76317644
@@ -7679,8 +7692,46 @@ def between_time(
76797692
if not isinstance(index, DatetimeIndex):
76807693
raise TypeError("Index must be DatetimeIndex")
76817694

7695+
if (include_start != lib.no_default or include_end != lib.no_default) and (
7696+
inclusive is not None
7697+
):
7698+
raise ValueError(
7699+
"Deprecated arguments `include_start` and `include_end` "
7700+
"cannot be passed if `inclusive` has been given."
7701+
)
7702+
# If any of the deprecated arguments ('include_start', 'include_end')
7703+
# have been passed
7704+
elif (include_start != lib.no_default) or (include_end != lib.no_default):
7705+
warnings.warn(
7706+
"`include_start` and `include_end` are deprecated in "
7707+
"favour of `inclusive`.",
7708+
FutureWarning,
7709+
stacklevel=2,
7710+
)
7711+
left = True if isinstance(include_start, lib.NoDefault) else include_start
7712+
right = True if isinstance(include_end, lib.NoDefault) else include_end
7713+
7714+
inc_dict = {
7715+
(True, True): "both",
7716+
(True, False): "left",
7717+
(False, True): "right",
7718+
(False, False): "neither",
7719+
}
7720+
inclusive = inc_dict[(left, right)]
7721+
else: # On arg removal inclusive can default to "both"
7722+
if inclusive is None:
7723+
inclusive = "both"
7724+
elif inclusive not in ["both", "neither", "left", "right"]:
7725+
raise ValueError(
7726+
f"Inclusive has to be either string of 'both', "
7727+
f"'left', 'right', or 'neither'. Got {inclusive}."
7728+
)
7729+
76827730
indexer = index.indexer_between_time(
7683-
start_time, end_time, include_start=include_start, include_end=include_end
7731+
start_time,
7732+
end_time,
7733+
include_start=inclusive in ["both", "left"],
7734+
include_end=inclusive in ["both", "right"],
76847735
)
76857736
return self._take_with_is_copy(indexer, axis=axis)
76867737

pandas/tests/frame/conftest.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from itertools import product
2-
31
import numpy as np
42
import pytest
53

@@ -11,8 +9,8 @@
119
import pandas._testing as tm
1210

1311

14-
@pytest.fixture(params=product([True, False], [True, False]))
15-
def close_open_fixture(request):
12+
@pytest.fixture(params=["both", "neither", "left", "right"])
13+
def inclusive_endpoints_fixture(request):
1614
return request.param
1715

1816

pandas/tests/frame/methods/test_between_time.py

+101-12
Original file line numberDiff line numberDiff line change
@@ -69,32 +69,33 @@ def test_between_time_types(self, frame_or_series):
6969
with pytest.raises(ValueError, match=msg):
7070
obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
7171

72-
def test_between_time(self, close_open_fixture, frame_or_series):
72+
def test_between_time(self, inclusive_endpoints_fixture, frame_or_series):
7373
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
7474
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
7575
if frame_or_series is not DataFrame:
7676
ts = ts[0]
7777

7878
stime = time(0, 0)
7979
etime = time(1, 0)
80-
inc_start, inc_end = close_open_fixture
80+
inclusive = inclusive_endpoints_fixture
8181

82-
filtered = ts.between_time(stime, etime, inc_start, inc_end)
82+
filtered = ts.between_time(stime, etime, inclusive=inclusive)
8383
exp_len = 13 * 4 + 1
84-
if not inc_start:
84+
85+
if inclusive in ["right", "neither"]:
8586
exp_len -= 5
86-
if not inc_end:
87+
if inclusive in ["left", "neither"]:
8788
exp_len -= 4
8889

8990
assert len(filtered) == exp_len
9091
for rs in filtered.index:
9192
t = rs.time()
92-
if inc_start:
93+
if inclusive in ["left", "both"]:
9394
assert t >= stime
9495
else:
9596
assert t > stime
9697

97-
if inc_end:
98+
if inclusive in ["right", "both"]:
9899
assert t <= etime
99100
else:
100101
assert t < etime
@@ -111,22 +112,22 @@ def test_between_time(self, close_open_fixture, frame_or_series):
111112
stime = time(22, 0)
112113
etime = time(9, 0)
113114

114-
filtered = ts.between_time(stime, etime, inc_start, inc_end)
115+
filtered = ts.between_time(stime, etime, inclusive=inclusive)
115116
exp_len = (12 * 11 + 1) * 4 + 1
116-
if not inc_start:
117+
if inclusive in ["right", "neither"]:
117118
exp_len -= 4
118-
if not inc_end:
119+
if inclusive in ["left", "neither"]:
119120
exp_len -= 4
120121

121122
assert len(filtered) == exp_len
122123
for rs in filtered.index:
123124
t = rs.time()
124-
if inc_start:
125+
if inclusive in ["left", "both"]:
125126
assert (t >= stime) or (t <= etime)
126127
else:
127128
assert (t > stime) or (t <= etime)
128129

129-
if inc_end:
130+
if inclusive in ["right", "both"]:
130131
assert (t <= etime) or (t >= stime)
131132
else:
132133
assert (t < etime) or (t >= stime)
@@ -207,3 +208,91 @@ def test_between_time_datetimeindex(self):
207208
tm.assert_frame_equal(result, expected)
208209
tm.assert_frame_equal(result, expected2)
209210
assert len(result) == 12
211+
212+
@pytest.mark.parametrize("include_start", [True, False])
213+
@pytest.mark.parametrize("include_end", [True, False])
214+
def test_between_time_warn(self, include_start, include_end, frame_or_series):
215+
# GH40245
216+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
217+
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
218+
if frame_or_series is not DataFrame:
219+
ts = ts[0]
220+
221+
stime = time(0, 0)
222+
etime = time(1, 0)
223+
224+
match = (
225+
"`include_start` and `include_end` "
226+
"are deprecated in favour of `inclusive`."
227+
)
228+
with tm.assert_produces_warning(FutureWarning, match=match):
229+
_ = ts.between_time(stime, etime, include_start, include_end)
230+
231+
def test_between_time_incorr_arg_inclusive(self):
232+
# GH40245
233+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
234+
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
235+
236+
stime = time(0, 0)
237+
etime = time(1, 0)
238+
inclusive = "bad_string"
239+
msg = (
240+
"Inclusive has to be either string of 'both', 'left', 'right', "
241+
"or 'neither'. Got bad_string."
242+
)
243+
with pytest.raises(ValueError, match=msg):
244+
ts.between_time(stime, etime, inclusive=inclusive)
245+
246+
@pytest.mark.parametrize(
247+
"include_start, include_end", [(True, None), (True, True), (None, True)]
248+
)
249+
def test_between_time_incompatiable_args_given(self, include_start, include_end):
250+
# GH40245
251+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
252+
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
253+
254+
stime = time(0, 0)
255+
etime = time(1, 0)
256+
msg = (
257+
"Deprecated arguments `include_start` and `include_end` cannot be "
258+
"passed if `inclusive` has been given."
259+
)
260+
with pytest.raises(ValueError, match=msg):
261+
ts.between_time(stime, etime, include_start, include_end, inclusive="left")
262+
263+
def test_between_time_same_functionality_old_and_new_args(self):
264+
# GH40245
265+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
266+
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
267+
stime = time(0, 0)
268+
etime = time(1, 0)
269+
match = (
270+
"`include_start` and `include_end` "
271+
"are deprecated in favour of `inclusive`."
272+
)
273+
274+
result = ts.between_time(stime, etime)
275+
expected = ts.between_time(stime, etime, inclusive="both")
276+
tm.assert_frame_equal(result, expected)
277+
278+
with tm.assert_produces_warning(FutureWarning, match=match):
279+
result = ts.between_time(stime, etime, include_start=False)
280+
expected = ts.between_time(stime, etime, inclusive="right")
281+
tm.assert_frame_equal(result, expected)
282+
283+
with tm.assert_produces_warning(FutureWarning, match=match):
284+
result = ts.between_time(stime, etime, include_end=False)
285+
expected = ts.between_time(stime, etime, inclusive="left")
286+
tm.assert_frame_equal(result, expected)
287+
288+
with tm.assert_produces_warning(FutureWarning, match=match):
289+
result = ts.between_time(
290+
stime, etime, include_start=False, include_end=False
291+
)
292+
expected = ts.between_time(stime, etime, inclusive="neither")
293+
tm.assert_frame_equal(result, expected)
294+
295+
with tm.assert_produces_warning(FutureWarning, match=match):
296+
result = ts.between_time(stime, etime, include_start=True, include_end=True)
297+
expected = ts.between_time(stime, etime, inclusive="both")
298+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)