Skip to content

Commit 073b353

Browse files
author
Khor Chean Wei
authored
ENH: consistency of input args for boundaries - pd.interval_range (#46355)
1 parent 0141d92 commit 073b353

File tree

14 files changed

+106
-53
lines changed

14 files changed

+106
-53
lines changed

doc/source/user_guide/advanced.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -1082,14 +1082,14 @@ of :ref:`frequency aliases <timeseries.offset_aliases>` with datetime-like inter
10821082
10831083
pd.interval_range(start=pd.Timedelta("0 days"), periods=3, freq="9H")
10841084
1085-
Additionally, the ``closed`` parameter can be used to specify which side(s) the intervals
1086-
are closed on. Intervals are closed on the right side by default.
1085+
Additionally, the ``inclusive`` parameter can be used to specify which side(s) the intervals
1086+
are closed on. Intervals are closed on the both side by default.
10871087

10881088
.. ipython:: python
10891089
1090-
pd.interval_range(start=0, end=4, closed="both")
1090+
pd.interval_range(start=0, end=4, inclusive="both")
10911091
1092-
pd.interval_range(start=0, end=4, closed="neither")
1092+
pd.interval_range(start=0, end=4, inclusive="neither")
10931093
10941094
Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced
10951095
intervals from ``start`` to ``end`` inclusively, with ``periods`` number of elements

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ Other Deprecations
426426
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
427427
- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`)
428428
- Deprecated :attr:`Timedelta.delta` (:issue:`46476`)
429+
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
429430
-
430431

431432
.. ---------------------------------------------------------------------------

pandas/core/indexes/interval.py

+45-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Hashable,
1212
Literal,
1313
)
14+
import warnings
1415

1516
import numpy as np
1617

@@ -160,7 +161,7 @@ def _new_IntervalIndex(cls, d):
160161
A new ``IntervalIndex`` is typically constructed using
161162
:func:`interval_range`:
162163
163-
>>> pd.interval_range(start=0, end=5)
164+
>>> pd.interval_range(start=0, end=5, inclusive="right")
164165
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
165166
dtype='interval[int64, right]')
166167
@@ -443,7 +444,7 @@ def is_overlapping(self) -> bool:
443444
444445
Intervals that share closed endpoints overlap:
445446
446-
>>> index = pd.interval_range(0, 3, closed='both')
447+
>>> index = pd.interval_range(0, 3, inclusive='both')
447448
>>> index
448449
IntervalIndex([[0, 1], [1, 2], [2, 3]],
449450
dtype='interval[int64, both]')
@@ -452,7 +453,7 @@ def is_overlapping(self) -> bool:
452453
453454
Intervals that only have an open endpoint in common do not overlap:
454455
455-
>>> index = pd.interval_range(0, 3, closed='left')
456+
>>> index = pd.interval_range(0, 3, inclusive='left')
456457
>>> index
457458
IntervalIndex([[0, 1), [1, 2), [2, 3)],
458459
dtype='interval[int64, left]')
@@ -956,7 +957,8 @@ def interval_range(
956957
periods=None,
957958
freq=None,
958959
name: Hashable = None,
959-
closed: IntervalClosedType = "right",
960+
closed: lib.NoDefault = lib.no_default,
961+
inclusive: IntervalClosedType | None = None,
960962
) -> IntervalIndex:
961963
"""
962964
Return a fixed frequency IntervalIndex.
@@ -979,6 +981,14 @@ def interval_range(
979981
Whether the intervals are closed on the left-side, right-side, both
980982
or neither.
981983
984+
.. deprecated:: 1.5.0
985+
Argument `closed` has been deprecated to standardize boundary inputs.
986+
Use `inclusive` instead, to set each bound as closed or open.
987+
inclusive : {"both", "neither", "left", "right"}, default "both"
988+
Include boundaries; Whether to set each bound as closed or open.
989+
990+
.. versionadded:: 1.5.0
991+
982992
Returns
983993
-------
984994
IntervalIndex
@@ -1001,14 +1011,14 @@ def interval_range(
10011011
--------
10021012
Numeric ``start`` and ``end`` is supported.
10031013
1004-
>>> pd.interval_range(start=0, end=5)
1014+
>>> pd.interval_range(start=0, end=5, inclusive="right")
10051015
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
10061016
dtype='interval[int64, right]')
10071017
10081018
Additionally, datetime-like input is also supported.
10091019
10101020
>>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1011-
... end=pd.Timestamp('2017-01-04'))
1021+
... end=pd.Timestamp('2017-01-04'), inclusive="right")
10121022
IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
10131023
(2017-01-03, 2017-01-04]],
10141024
dtype='interval[datetime64[ns], right]')
@@ -1017,33 +1027,56 @@ def interval_range(
10171027
endpoints of the individual intervals within the ``IntervalIndex``. For
10181028
numeric ``start`` and ``end``, the frequency must also be numeric.
10191029
1020-
>>> pd.interval_range(start=0, periods=4, freq=1.5)
1030+
>>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right")
10211031
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
10221032
dtype='interval[float64, right]')
10231033
10241034
Similarly, for datetime-like ``start`` and ``end``, the frequency must be
10251035
convertible to a DateOffset.
10261036
10271037
>>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1028-
... periods=3, freq='MS')
1038+
... periods=3, freq='MS', inclusive="right")
10291039
IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
10301040
(2017-03-01, 2017-04-01]],
10311041
dtype='interval[datetime64[ns], right]')
10321042
10331043
Specify ``start``, ``end``, and ``periods``; the frequency is generated
10341044
automatically (linearly spaced).
10351045
1036-
>>> pd.interval_range(start=0, end=6, periods=4)
1046+
>>> pd.interval_range(start=0, end=6, periods=4, inclusive="right")
10371047
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
10381048
dtype='interval[float64, right]')
10391049
1040-
The ``closed`` parameter specifies which endpoints of the individual
1050+
The ``inclusive`` parameter specifies which endpoints of the individual
10411051
intervals within the ``IntervalIndex`` are closed.
10421052
1043-
>>> pd.interval_range(end=5, periods=4, closed='both')
1053+
>>> pd.interval_range(end=5, periods=4, inclusive='both')
10441054
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
10451055
dtype='interval[int64, both]')
10461056
"""
1057+
if inclusive is not None and not isinstance(closed, lib.NoDefault):
1058+
raise ValueError(
1059+
"Deprecated argument `closed` cannot be passed "
1060+
"if argument `inclusive` is not None"
1061+
)
1062+
elif not isinstance(closed, lib.NoDefault):
1063+
warnings.warn(
1064+
"Argument `closed` is deprecated in favor of `inclusive`.",
1065+
FutureWarning,
1066+
stacklevel=2,
1067+
)
1068+
if closed is None:
1069+
inclusive = "both"
1070+
elif closed in ("both", "neither", "left", "right"):
1071+
inclusive = closed
1072+
else:
1073+
raise ValueError(
1074+
"Argument `closed` has to be either"
1075+
"'both', 'neither', 'left' or 'right'"
1076+
)
1077+
elif inclusive is None:
1078+
inclusive = "both"
1079+
10471080
start = maybe_box_datetimelike(start)
10481081
end = maybe_box_datetimelike(end)
10491082
endpoint = start if start is not None else end
@@ -1120,4 +1153,4 @@ def interval_range(
11201153
else:
11211154
breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)
11221155

1123-
return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
1156+
return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)

pandas/tests/frame/methods/test_round.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def test_round_nonunique_categorical(self):
210210

211211
def test_round_interval_category_columns(self):
212212
# GH#30063
213-
columns = pd.CategoricalIndex(pd.interval_range(0, 2))
213+
columns = pd.CategoricalIndex(pd.interval_range(0, 2, inclusive="right"))
214214
df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns)
215215

216216
result = df.round()

pandas/tests/groupby/aggregate/test_cython.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def test_cython_agg_empty_buckets_nanops(observed):
214214
result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
215215
"add", alt=None, numeric_only=True
216216
)
217-
intervals = pd.interval_range(0, 20, freq=5)
217+
intervals = pd.interval_range(0, 20, freq=5, inclusive="right")
218218
expected = DataFrame(
219219
{"a": [0, 0, 36, 0]},
220220
index=pd.CategoricalIndex(intervals, name="a", ordered=True),

pandas/tests/indexes/interval/test_astype.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_subtype_integer(self, subtype_start, subtype_end):
117117
@pytest.mark.xfail(reason="GH#15832")
118118
def test_subtype_integer_errors(self):
119119
# int64 -> uint64 fails with negative values
120-
index = interval_range(-10, 10)
120+
index = interval_range(-10, 10, inclusive="right")
121121
dtype = IntervalDtype("uint64", "right")
122122

123123
# Until we decide what the exception message _should_ be, we
@@ -133,7 +133,7 @@ class TestFloatSubtype(AstypeTests):
133133
"""Tests specific to IntervalIndex with float subtype"""
134134

135135
indexes = [
136-
interval_range(-10.0, 10.0, closed="neither"),
136+
interval_range(-10.0, 10.0, inclusive="neither"),
137137
IntervalIndex.from_arrays(
138138
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
139139
),
@@ -170,7 +170,7 @@ def test_subtype_integer_with_non_integer_borders(self, subtype):
170170

171171
def test_subtype_integer_errors(self):
172172
# float64 -> uint64 fails with negative values
173-
index = interval_range(-10.0, 10.0)
173+
index = interval_range(-10.0, 10.0, inclusive="right")
174174
dtype = IntervalDtype("uint64", "right")
175175
msg = re.escape(
176176
"Cannot convert interval[float64, right] to interval[uint64, right]; "
@@ -191,10 +191,10 @@ class TestDatetimelikeSubtype(AstypeTests):
191191
"""Tests specific to IntervalIndex with datetime-like subtype"""
192192

193193
indexes = [
194-
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
194+
interval_range(Timestamp("2018-01-01"), periods=10, inclusive="neither"),
195195
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
196196
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
197-
interval_range(Timedelta("0 days"), periods=10, closed="both"),
197+
interval_range(Timedelta("0 days"), periods=10, inclusive="both"),
198198
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
199199
]
200200

pandas/tests/indexes/interval/test_interval.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,10 @@ def test_delete(self, closed):
167167
@pytest.mark.parametrize(
168168
"data",
169169
[
170-
interval_range(0, periods=10, closed="neither"),
171-
interval_range(1.7, periods=8, freq=2.5, closed="both"),
172-
interval_range(Timestamp("20170101"), periods=12, closed="left"),
173-
interval_range(Timedelta("1 day"), periods=6, closed="right"),
170+
interval_range(0, periods=10, inclusive="neither"),
171+
interval_range(1.7, periods=8, freq=2.5, inclusive="both"),
172+
interval_range(Timestamp("20170101"), periods=12, inclusive="left"),
173+
interval_range(Timedelta("1 day"), periods=6, inclusive="right"),
174174
],
175175
)
176176
def test_insert(self, data):
@@ -868,9 +868,9 @@ def test_nbytes(self):
868868
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
869869
def test_set_closed(self, name, closed, new_closed):
870870
# GH 21670
871-
index = interval_range(0, 5, closed=closed, name=name)
871+
index = interval_range(0, 5, inclusive=closed, name=name)
872872
result = index.set_closed(new_closed)
873-
expected = interval_range(0, 5, closed=new_closed, name=name)
873+
expected = interval_range(0, 5, inclusive=new_closed, name=name)
874874
tm.assert_index_equal(result, expected)
875875

876876
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])

pandas/tests/indexes/interval/test_interval_range.py

+30-14
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,25 @@ def test_constructor_numeric(self, closed, name, freq, periods):
3434

3535
# defined from start/end/freq
3636
result = interval_range(
37-
start=start, end=end, freq=freq, name=name, closed=closed
37+
start=start, end=end, freq=freq, name=name, inclusive=closed
3838
)
3939
tm.assert_index_equal(result, expected)
4040

4141
# defined from start/periods/freq
4242
result = interval_range(
43-
start=start, periods=periods, freq=freq, name=name, closed=closed
43+
start=start, periods=periods, freq=freq, name=name, inclusive=closed
4444
)
4545
tm.assert_index_equal(result, expected)
4646

4747
# defined from end/periods/freq
4848
result = interval_range(
49-
end=end, periods=periods, freq=freq, name=name, closed=closed
49+
end=end, periods=periods, freq=freq, name=name, inclusive=closed
5050
)
5151
tm.assert_index_equal(result, expected)
5252

5353
# GH 20976: linspace behavior defined from start/end/periods
5454
result = interval_range(
55-
start=start, end=end, periods=periods, name=name, closed=closed
55+
start=start, end=end, periods=periods, name=name, inclusive=closed
5656
)
5757
tm.assert_index_equal(result, expected)
5858

@@ -67,19 +67,19 @@ def test_constructor_timestamp(self, closed, name, freq, periods, tz):
6767

6868
# defined from start/end/freq
6969
result = interval_range(
70-
start=start, end=end, freq=freq, name=name, closed=closed
70+
start=start, end=end, freq=freq, name=name, inclusive=closed
7171
)
7272
tm.assert_index_equal(result, expected)
7373

7474
# defined from start/periods/freq
7575
result = interval_range(
76-
start=start, periods=periods, freq=freq, name=name, closed=closed
76+
start=start, periods=periods, freq=freq, name=name, inclusive=closed
7777
)
7878
tm.assert_index_equal(result, expected)
7979

8080
# defined from end/periods/freq
8181
result = interval_range(
82-
end=end, periods=periods, freq=freq, name=name, closed=closed
82+
end=end, periods=periods, freq=freq, name=name, inclusive=closed
8383
)
8484
tm.assert_index_equal(result, expected)
8585

@@ -88,7 +88,7 @@ def test_constructor_timestamp(self, closed, name, freq, periods, tz):
8888
# matches expected only for non-anchored offsets and tz naive
8989
# (anchored/DST transitions cause unequal spacing in expected)
9090
result = interval_range(
91-
start=start, end=end, periods=periods, name=name, closed=closed
91+
start=start, end=end, periods=periods, name=name, inclusive=closed
9292
)
9393
tm.assert_index_equal(result, expected)
9494

@@ -102,25 +102,25 @@ def test_constructor_timedelta(self, closed, name, freq, periods):
102102

103103
# defined from start/end/freq
104104
result = interval_range(
105-
start=start, end=end, freq=freq, name=name, closed=closed
105+
start=start, end=end, freq=freq, name=name, inclusive=closed
106106
)
107107
tm.assert_index_equal(result, expected)
108108

109109
# defined from start/periods/freq
110110
result = interval_range(
111-
start=start, periods=periods, freq=freq, name=name, closed=closed
111+
start=start, periods=periods, freq=freq, name=name, inclusive=closed
112112
)
113113
tm.assert_index_equal(result, expected)
114114

115115
# defined from end/periods/freq
116116
result = interval_range(
117-
end=end, periods=periods, freq=freq, name=name, closed=closed
117+
end=end, periods=periods, freq=freq, name=name, inclusive=closed
118118
)
119119
tm.assert_index_equal(result, expected)
120120

121121
# GH 20976: linspace behavior defined from start/end/periods
122122
result = interval_range(
123-
start=start, end=end, periods=periods, name=name, closed=closed
123+
start=start, end=end, periods=periods, name=name, inclusive=closed
124124
)
125125
tm.assert_index_equal(result, expected)
126126

@@ -163,7 +163,9 @@ def test_no_invalid_float_truncation(self, start, end, freq):
163163
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
164164
expected = IntervalIndex.from_breaks(breaks)
165165

166-
result = interval_range(start=start, end=end, periods=4, freq=freq)
166+
result = interval_range(
167+
start=start, end=end, periods=4, freq=freq, inclusive="right"
168+
)
167169
tm.assert_index_equal(result, expected)
168170

169171
@pytest.mark.parametrize(
@@ -184,7 +186,7 @@ def test_no_invalid_float_truncation(self, start, end, freq):
184186
def test_linspace_dst_transition(self, start, mid, end):
185187
# GH 20976: linspace behavior defined from start/end/periods
186188
# accounts for the hour gained/lost during DST transition
187-
result = interval_range(start=start, end=end, periods=2)
189+
result = interval_range(start=start, end=end, periods=2, inclusive="right")
188190
expected = IntervalIndex.from_breaks([start, mid, end])
189191
tm.assert_index_equal(result, expected)
190192

@@ -353,3 +355,17 @@ def test_errors(self):
353355
msg = "Start and end cannot both be tz-aware with different timezones"
354356
with pytest.raises(TypeError, match=msg):
355357
interval_range(start=start, end=end)
358+
359+
def test_interval_range_error_and_warning(self):
360+
# GH 40245
361+
362+
msg = (
363+
"Deprecated argument `closed` cannot "
364+
"be passed if argument `inclusive` is not None"
365+
)
366+
with pytest.raises(ValueError, match=msg):
367+
interval_range(end=5, periods=4, closed="both", inclusive="both")
368+
369+
msg = "Argument `closed` is deprecated in favor of `inclusive`"
370+
with tm.assert_produces_warning(FutureWarning, match=msg):
371+
interval_range(end=5, periods=4, closed="right")

pandas/tests/indexes/interval/test_setops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def test_set_incompatible_types(self, closed, op_name, sort):
194194
tm.assert_index_equal(result, expected)
195195

196196
# GH 19016: incompatible dtypes -> cast to object
197-
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
197+
other = interval_range(Timestamp("20180101"), periods=9, inclusive=closed)
198198
expected = getattr(index.astype(object), op_name)(other, sort=sort)
199199
if op_name == "difference":
200200
expected = index

0 commit comments

Comments
 (0)