Skip to content

ENH: consistency of input args for boundaries (pd.date_range) #43504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 3, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ Other Deprecations
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
-
- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)

.. ---------------------------------------------------------------------------

Expand Down Expand Up @@ -340,6 +341,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`)
-

Timedelta
Expand Down
24 changes: 17 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from pandas._typing import npt
from pandas.errors import PerformanceWarning
from pandas.util._validators import validate_endpoints
from pandas.util._validators import validate_inclusive

from pandas.core.dtypes.cast import astype_dt64_to_dt64tz
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -394,7 +394,7 @@ def _generate_range(
normalize=False,
ambiguous="raise",
nonexistent="raise",
closed=None,
inclusive="both",
):

periods = dtl.validate_periods(periods)
Expand All @@ -417,7 +417,7 @@ def _generate_range(
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

left_closed, right_closed = validate_endpoints(closed)
left_inclusive, right_inclusive = validate_inclusive(inclusive)
start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)
tz = _infer_tz_from_endpoints(start, end, tz)

Expand Down Expand Up @@ -477,10 +477,20 @@ def _generate_range(
arr = arr.astype("M8[ns]", copy=False)
index = cls._simple_new(arr, freq=None, dtype=dtype)

if not left_closed and len(index) and index[0] == start:
index = index[1:]
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]
# do not remove when one side is inclusive
# and removing would leave index empty
to_remove_any = not (
(left_inclusive or right_inclusive)
and len(index) == 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be len(index) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mm yeah, changed that and changed the tests in test_date_range

and start == index[0]
and start == end
)

if to_remove_any:
if (not left_inclusive) and len(index) and index[0] == start:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are reusing the same logic tests deriving to_remove_any. I haven't come up with a better way but instinctively this feels a complicated logic path. Have you tried refactoring to a simpler setup?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I refactored the logic a little and I am not sure if it is any better though

index = index[1:]
if (not right_inclusive) and len(index) and index[-1] == end:
index = index[:-1]

dtype = tz_to_dtype(tz)
return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)
Expand Down
43 changes: 40 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,8 @@ def date_range(
tz=None,
normalize: bool = False,
name: Hashable = None,
closed=None,
closed: str | None | lib.NoDefault = lib.no_default,
inclusive: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -919,6 +920,12 @@ def date_range(
closed : {None, 'left', 'right'}, optional
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None, the default).

.. deprecated:: 1.4.0
Argument `closed` have been deprecated to standardize boundary inputs.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'has' not 'have'.

Use `inclusive` instead, to set each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; Whether to set each bound as closed or open.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versionadded 1.4.0

**kwargs
For compatibility. Has no effect on the result.

Expand Down Expand Up @@ -1029,6 +1036,28 @@ def date_range(
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq='D')
"""
if inclusive is not None and closed is not lib.no_default:
raise ValueError(
"Deprecated argument `closed` cannot be passed"
"if argument `inclusive` is not None"
)
elif closed is not lib.no_default:
warnings.warn(
"Argument `closed` is deprecated in favor of `inclusive`.",
FutureWarning,
stacklevel=2,
)
if closed is None:
inclusive = "both"
elif closed in ("left", "right"):
inclusive = closed
else:
raise ValueError(
"Argument `closed` has to be either 'left', 'right' or None"
)
elif inclusive is None:
inclusive = "both"

if freq is None and com.any_none(periods, start, end):
freq = "D"

Expand All @@ -1039,7 +1068,7 @@ def date_range(
freq=freq,
tz=tz,
normalize=normalize,
closed=closed,
inclusive=inclusive,
**kwargs,
)
return DatetimeIndex._simple_new(dtarr, name=name)
Expand All @@ -1055,7 +1084,8 @@ def bdate_range(
name: Hashable = None,
weekmask=None,
holidays=None,
closed=None,
closed: lib.NoDefault = lib.no_default,
inclusive: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -1090,6 +1120,12 @@ def bdate_range(
closed : str, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None).

.. deprecated:: 1.4.0
Argument `closed` have been deprecated to standardize boundary inputs.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'has'

Use `inclusive` instead, to set each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; Whether to set each bound as closed or open.
**kwargs
For compatibility. Has no effect on the result.

Expand Down Expand Up @@ -1143,6 +1179,7 @@ def bdate_range(
normalize=normalize,
name=name,
closed=closed,
inclusive=inclusive,
**kwargs,
)

Expand Down
110 changes: 76 additions & 34 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,85 +548,116 @@ def test_range_closed(self, freq):
begin = datetime(2011, 1, 1)
end = datetime(2014, 1, 1)

closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
both = date_range(begin, end, inclusive="both", freq=freq)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parameterize this test instead, using the inclusive_endpoints_fixture fixtures (its in pandas/tests/frameso likely need to move topandas/conftest.py`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea makes sense, will do that

left = date_range(begin, end, inclusive="left", freq=freq)
right = date_range(begin, end, inclusive="right", freq=freq)
neither = date_range(begin, end, inclusive="neither", freq=freq)

expected_left = left
expected_right = right
expected_neither = neither

if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
if end == both[-1]:
expected_left = both[:-1]
if begin == both[0]:
expected_right = both[1:]
if end == both[-1] and begin == both[0]:
expected_neither = both[1:-1]

tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
tm.assert_index_equal(expected_neither, neither)

def test_range_closed_with_tz_aware_start_end(self):
# GH12409, GH12684
begin = Timestamp("2011/1/1", tz="US/Eastern")
end = Timestamp("2014/1/1", tz="US/Eastern")

for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
both = date_range(begin, end, inclusive="both", freq=freq)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

left = date_range(begin, end, inclusive="left", freq=freq)
right = date_range(begin, end, inclusive="right", freq=freq)
neither = date_range(begin, end, inclusive="neither", freq=freq)

expected_left = left
expected_right = right
expected_neither = neither

if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
if end == both[-1]:
expected_left = both[:-1]
if begin == both[0]:
expected_right = both[1:]
if end == both[-1] and begin == both[0]:
expected_neither = both[1:-1]

tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
tm.assert_index_equal(expected_neither, neither)

begin = Timestamp("2011/1/1")
end = Timestamp("2014/1/1")
begintz = Timestamp("2011/1/1", tz="US/Eastern")
endtz = Timestamp("2014/1/1", tz="US/Eastern")

for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern")
left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern")
right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern")
both = date_range(begin, end, inclusive="both", freq=freq, tz="US/Eastern")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

left = date_range(begin, end, inclusive="left", freq=freq, tz="US/Eastern")
right = date_range(
begin, end, inclusive="right", freq=freq, tz="US/Eastern"
)
neither = date_range(
begin, end, inclusive="neither", freq=freq, tz="US/Eastern"
)

expected_left = left
expected_right = right
expected_neither = neither

if endtz == closed[-1]:
expected_left = closed[:-1]
if begintz == closed[0]:
expected_right = closed[1:]
if endtz == both[-1]:
expected_left = both[:-1]
if begintz == both[0]:
expected_right = both[1:]
if begintz == both[0] and endtz == both[-1]:
expected_neither = both[1:-1]

tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
tm.assert_index_equal(expected_neither, neither)

@pytest.mark.parametrize("closed", ["right", "left", None])
def test_range_closed_boundary(self, closed):
@pytest.mark.parametrize("inclusive", ["right", "left", "both", "neither"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

def test_range_closed_boundary(self, inclusive):
# GH#11804
right_boundary = date_range(
"2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed
"2015-09-12", "2015-12-01", freq="QS-MAR", inclusive=inclusive
)
left_boundary = date_range(
"2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed
"2015-09-01", "2015-09-12", freq="QS-MAR", inclusive=inclusive
)
both_boundary = date_range(
"2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed
"2015-09-01", "2015-12-01", freq="QS-MAR", inclusive=inclusive
)
expected_right = expected_left = expected_both = both_boundary
neither_boundary = date_range(
"2015-09-11", "2015-09-12", freq="QS-MAR", inclusive=inclusive
)

expected_right = both_boundary
expected_left = both_boundary
expected_both = both_boundary

if closed == "right":
if inclusive == "right":
expected_left = both_boundary[1:]
if closed == "left":
elif inclusive == "left":
expected_right = both_boundary[:-1]
if closed is None:
elif inclusive == "both":
expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]

expected_neither = both_boundary[1:-1]

tm.assert_index_equal(right_boundary, expected_right)
tm.assert_index_equal(left_boundary, expected_left)
tm.assert_index_equal(both_boundary, expected_both)
tm.assert_index_equal(neither_boundary, expected_neither)

def test_years_only(self):
# GH 6961
Expand Down Expand Up @@ -679,6 +710,17 @@ def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
]
tm.assert_index_equal(result, expected)

def test_range_where_start_equal_end(self):
# GH 43394
start = "2021-09-02"
end = "2021-09-02"
right_result = date_range(start=start, end=end, freq="D", inclusive="right")
left_result = date_range(start=start, end=end, freq="D", inclusive="left")
expected = date_range(start=start, end=end, freq="D", inclusive="both")

tm.assert_index_equal(right_result, expected)
tm.assert_index_equal(left_result, expected)


class TestDateRangeTZ:
"""Tests for date_range with timezones"""
Expand Down Expand Up @@ -867,12 +909,12 @@ def test_daterange_bug_456(self):
result = rng1.union(rng2)
assert isinstance(result, DatetimeIndex)

@pytest.mark.parametrize("closed", ["left", "right"])
def test_bdays_and_open_boundaries(self, closed):
@pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"])
def test_bdays_and_open_boundaries(self, inclusive):
# GH 6673
start = "2018-07-21" # Saturday
end = "2018-07-29" # Sunday
result = date_range(start, end, freq="B", closed=closed)
result = date_range(start, end, freq="B", inclusive=inclusive)

bday_start = "2018-07-23" # Monday
bday_end = "2018-07-27" # Friday
Expand Down Expand Up @@ -1018,7 +1060,7 @@ def test_all_custom_freq(self, freq):
def test_range_with_millisecond_resolution(self, start_end):
# https://github.com/pandas-dev/pandas/issues/24110
start, end = start_end
result = date_range(start=start, end=end, periods=2, closed="left")
result = date_range(start=start, end=end, periods=2, inclusive="left")
expected = DatetimeIndex([start])
tm.assert_index_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1796,7 +1796,7 @@ def test_loc_setitem_with_expansion_and_existing_dst(self):
start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
idx = date_range(start, end, closed="left", freq="H")
idx = date_range(start, end, inclusive="left", freq="H")
assert ts not in idx # i.e. result.loc setitem is with-expansion

result = DataFrame(index=idx, columns=["value"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_asfreq(self, series_and_frame, freq, kind):
else:
start = obj.index[0].to_timestamp(how="start")
end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
new_index = date_range(start=start, end=end, freq=freq, closed="left")
new_index = date_range(start=start, end=end, freq=freq, inclusive="left")
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
result = obj.resample(freq, kind=kind).asfreq()
tm.assert_almost_equal(result, expected)
Expand Down