Skip to content

BUG: date_range inclusive parameter behavior doesn't match interval notation #55319

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
- Fixed bug in :meth:`DatetimeArray._generate_range` where ``inclusive`` argument behavior did not match interval notation (:issue:`55293`,:issue:`46331`)
- Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`)
- Fix :func:`read_parquet` and :func:`read_feather` for `CVE-2023-47248 <https://www.cve.org/CVERecord?id=CVE-2023-47248>`__ (:issue:`55894`)

Expand Down
16 changes: 5 additions & 11 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,17 +508,11 @@ def _generate_range( # type: ignore[override]
# to overflow and cast to e.g. f8, but if it does we need to cast
i8values = i8values.astype("i8")

if start == end:
if not left_inclusive and not right_inclusive:
i8values = i8values[1:-1]
else:
start_i8 = Timestamp(start)._value
end_i8 = Timestamp(end)._value
if not left_inclusive or not right_inclusive:
if not left_inclusive and len(i8values) and i8values[0] == start_i8:
i8values = i8values[1:]
if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
i8values = i8values[:-1]
if not left_inclusive:
i8values = i8values[1:]

if not right_inclusive:
i8values = i8values[:-1]

dt64_values = i8values.view(f"datetime64[{unit}]")
dtype = tz_to_dtype(tz, unit=unit)
Expand Down
68 changes: 32 additions & 36 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,22 @@


def _get_expected_range(
begin_to_match,
end_to_match,
both_range,
inclusive_endpoints,
):
"""Helper to get expected range from a both inclusive range"""
left_match = begin_to_match == both_range[0]
right_match = end_to_match == both_range[-1]

if inclusive_endpoints == "left" and right_match:
expected_range = both_range[:-1]
elif inclusive_endpoints == "right" and left_match:
expected_range = both_range[1:]
elif inclusive_endpoints == "neither" and left_match and right_match:
expected_range = both_range[1:-1]
elif inclusive_endpoints == "neither" and right_match:
if inclusive_endpoints == "left":
expected_range = both_range[:-1]
elif inclusive_endpoints == "neither" and left_match:
elif inclusive_endpoints == "right":
expected_range = both_range[1:]
elif inclusive_endpoints == "both":
expected_range = both_range[:]
elif inclusive_endpoints == "neither":
expected_range = both_range[1:-1]
else:
expected_range = both_range[:]
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)

return expected_range

Expand Down Expand Up @@ -591,12 +584,29 @@ def test_range_closed(self, freq, tz, inclusive_endpoints_fixture):
begin = Timestamp("2011/1/1", tz=tz)
end = Timestamp("2014/1/1", tz=tz)

result_range = date_range(
begin, end, inclusive=inclusive_endpoints_fixture, freq=freq
)
both_range = date_range(begin, end, inclusive="both", freq=freq)
expected_range = _get_expected_range(both_range, inclusive_endpoints_fixture)

tm.assert_index_equal(expected_range, result_range)

@pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
def test_range_closed_with_tz_aware_start_end(
self, freq, inclusive_endpoints_fixture
):
# GH12409, GH12684
begin = Timestamp("2011/1/1", tz="US/Eastern")
end = Timestamp("2014/1/1", tz="US/Eastern")

result_range = date_range(
begin, end, inclusive=inclusive_endpoints_fixture, freq=freq
)
both_range = date_range(begin, end, inclusive="both", freq=freq)
expected_range = _get_expected_range(
begin, end, both_range, inclusive_endpoints_fixture
both_range,
inclusive_endpoints_fixture,
)

tm.assert_index_equal(expected_range, result_range)
Expand All @@ -607,8 +617,8 @@ def test_range_with_tz_closed_with_tz_aware_start_end(
):
begin = Timestamp("2011/1/1")
end = Timestamp("2014/1/1")
begintz = Timestamp("2011/1/1", tz="US/Eastern")
endtz = Timestamp("2014/1/1", tz="US/Eastern")
# begintz = Timestamp("2011/1/1", tz="US/Eastern")
# endtz = Timestamp("2014/1/1", tz="US/Eastern")
Comment on lines -610 to +621
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this part of the test comment out?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The shortest answer would be that those lines are not needed and can be safely deleted. I just commented them out when I was testing and then forgot to remove them.

And as for why they were there in the first place. They were used by _get_expected_range, but they are not needed for the function to work

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok thanks - if they're not needed, then to simplify reviews, could you please first open a precursor PR in which you remove anything that's not needed, and then we keep this PR focused on the logic changes (+ relevant tests)? thanks 🙏

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid that might not be possible. The code is complicated because of its logic errors, thus I can't simplify it without making logic changes.
For example:
in test_range_closed_boundary

expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]
expected_both = both_boundary[:]
expected_neither = both_boundary[1:-1]

this simplification doesn't work if there are no logic changes in the _generate_range function, because then the expected values are compared to incorrectly generated ranges.


result_range = date_range(
begin,
Expand All @@ -621,8 +631,6 @@ def test_range_with_tz_closed_with_tz_aware_start_end(
begin, end, inclusive="both", freq=freq, tz="US/Eastern"
)
expected_range = _get_expected_range(
begintz,
endtz,
both_range,
inclusive_endpoints_fixture,
)
Expand Down Expand Up @@ -656,18 +664,9 @@ def test_range_closed_boundary(self, inclusive_endpoints_fixture):
inclusive=inclusive_endpoints_fixture,
)

expected_right = both_boundary
expected_left = both_boundary
expected_both = both_boundary

if inclusive_endpoints_fixture == "right":
expected_left = both_boundary[1:]
elif inclusive_endpoints_fixture == "left":
expected_right = both_boundary[:-1]
elif inclusive_endpoints_fixture == "both":
expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]

expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]
expected_both = both_boundary[:]
expected_neither = both_boundary[1:-1]

tm.assert_index_equal(right_boundary, expected_right)
Expand Down Expand Up @@ -748,10 +747,7 @@ def test_range_where_start_equal_end(self, inclusive_endpoints_fixture):
)

both_range = date_range(start=start, end=end, freq="D", inclusive="both")
if inclusive_endpoints_fixture == "neither":
expected = both_range[1:-1]
elif inclusive_endpoints_fixture in ("left", "right", "both"):
expected = both_range[:]
expected = _get_expected_range(both_range, inclusive_endpoints_fixture)

tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -1102,7 +1098,7 @@ def test_bdays_and_open_boundaries(self, inclusive):

bday_start = "2018-07-23" # Monday
bday_end = "2018-07-27" # Friday
expected = date_range(bday_start, bday_end, freq="D")
expected = date_range(bday_start, bday_end, freq="D", inclusive=inclusive)
tm.assert_index_equal(result, expected)
# Note: we do _not_ expect the freqs to match here

Expand Down