Skip to content

Commit 81a44fa

Browse files
BUG: DateTimeIndex.is_year_start unexpected behavior when constructed with freq 'MS' date_range (#57377) (#57494)
* Added some comments to where the bug is occurring * Potential fix, passed all potentially relevant tests * Very likely fix * Reverted ro previous start/end scheme; added tests * Added fixes to whatsnew doc * Removed stray comment * Fixed alphabetical problem in whatsnew * add parametric test * fixup --------- Co-authored-by: Marco Gorelli <[email protected]>
1 parent f2f298b commit 81a44fa

File tree

4 files changed

+99
-2
lines changed

4 files changed

+99
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ Categorical
481481

482482
Datetimelike
483483
^^^^^^^^^^^^
484+
- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
484485
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
485486
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
486487
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)

pandas/_libs/tslibs/fields.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,10 @@ def get_start_end_field(
253253
# month of year. Other offsets use month, startingMonth as ending
254254
# month of year.
255255

256-
if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]:
256+
if freq_name.lstrip("B")[0:2] in ["QS", "YS"]:
257257
end_month = 12 if month_kw == 1 else month_kw - 1
258258
start_month = month_kw
259+
259260
else:
260261
end_month = month_kw
261262
start_month = (end_month % 12) + 1

pandas/core/arrays/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def f(self):
143143
month_kw = 12
144144
if freq:
145145
kwds = freq.kwds
146-
month_kw = kwds.get("startingMonth", kwds.get("month", 12))
146+
month_kw = kwds.get("startingMonth", kwds.get("month", month_kw))
147147

148148
if freq is not None:
149149
freq_name = freq.name

pandas/tests/indexes/datetimes/test_scalar_compat.py

+95
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import locale
1212
import unicodedata
1313

14+
from hypothesis import given
15+
import hypothesis.strategies as st
1416
import numpy as np
1517
import pytest
1618

@@ -329,6 +331,84 @@ def test_dti_is_month_start_custom(self):
329331
with pytest.raises(ValueError, match=msg):
330332
dti.is_month_start
331333

334+
@pytest.mark.parametrize(
335+
"timestamp, freq, periods, expected_values",
336+
[
337+
("2017-12-01", "MS", 3, np.array([False, True, False])),
338+
("2017-12-01", "QS", 3, np.array([True, False, False])),
339+
("2017-12-01", "YS", 3, np.array([True, True, True])),
340+
],
341+
)
342+
def test_dti_dr_is_year_start(self, timestamp, freq, periods, expected_values):
343+
# GH57377
344+
result = date_range(timestamp, freq=freq, periods=periods).is_year_start
345+
tm.assert_numpy_array_equal(result, expected_values)
346+
347+
@pytest.mark.parametrize(
348+
"timestamp, freq, periods, expected_values",
349+
[
350+
("2017-12-01", "ME", 3, np.array([True, False, False])),
351+
("2017-12-01", "QE", 3, np.array([True, False, False])),
352+
("2017-12-01", "YE", 3, np.array([True, True, True])),
353+
],
354+
)
355+
def test_dti_dr_is_year_end(self, timestamp, freq, periods, expected_values):
356+
# GH57377
357+
result = date_range(timestamp, freq=freq, periods=periods).is_year_end
358+
tm.assert_numpy_array_equal(result, expected_values)
359+
360+
@pytest.mark.parametrize(
361+
"timestamp, freq, periods, expected_values",
362+
[
363+
("2017-12-01", "MS", 3, np.array([False, True, False])),
364+
("2017-12-01", "QS", 3, np.array([True, True, True])),
365+
("2017-12-01", "YS", 3, np.array([True, True, True])),
366+
],
367+
)
368+
def test_dti_dr_is_quarter_start(self, timestamp, freq, periods, expected_values):
369+
# GH57377
370+
result = date_range(timestamp, freq=freq, periods=periods).is_quarter_start
371+
tm.assert_numpy_array_equal(result, expected_values)
372+
373+
@pytest.mark.parametrize(
374+
"timestamp, freq, periods, expected_values",
375+
[
376+
("2017-12-01", "ME", 3, np.array([True, False, False])),
377+
("2017-12-01", "QE", 3, np.array([True, True, True])),
378+
("2017-12-01", "YE", 3, np.array([True, True, True])),
379+
],
380+
)
381+
def test_dti_dr_is_quarter_end(self, timestamp, freq, periods, expected_values):
382+
# GH57377
383+
result = date_range(timestamp, freq=freq, periods=periods).is_quarter_end
384+
tm.assert_numpy_array_equal(result, expected_values)
385+
386+
@pytest.mark.parametrize(
387+
"timestamp, freq, periods, expected_values",
388+
[
389+
("2017-12-01", "MS", 3, np.array([True, True, True])),
390+
("2017-12-01", "QS", 3, np.array([True, True, True])),
391+
("2017-12-01", "YS", 3, np.array([True, True, True])),
392+
],
393+
)
394+
def test_dti_dr_is_month_start(self, timestamp, freq, periods, expected_values):
395+
# GH57377
396+
result = date_range(timestamp, freq=freq, periods=periods).is_month_start
397+
tm.assert_numpy_array_equal(result, expected_values)
398+
399+
@pytest.mark.parametrize(
400+
"timestamp, freq, periods, expected_values",
401+
[
402+
("2017-12-01", "ME", 3, np.array([True, True, True])),
403+
("2017-12-01", "QE", 3, np.array([True, True, True])),
404+
("2017-12-01", "YE", 3, np.array([True, True, True])),
405+
],
406+
)
407+
def test_dti_dr_is_month_end(self, timestamp, freq, periods, expected_values):
408+
# GH57377
409+
result = date_range(timestamp, freq=freq, periods=periods).is_month_end
410+
tm.assert_numpy_array_equal(result, expected_values)
411+
332412
def test_dti_is_year_quarter_start_doubledigit_freq(self):
333413
# GH#58523
334414
dr = date_range("2017-01-01", periods=2, freq="10YS")
@@ -343,3 +423,18 @@ def test_dti_is_year_start_freq_custom_business_day_with_digit(self):
343423
msg = "Custom business days is not supported by is_year_start"
344424
with pytest.raises(ValueError, match=msg):
345425
dr.is_year_start
426+
427+
428+
@given(
429+
dt=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
430+
n=st.integers(min_value=1, max_value=10),
431+
freq=st.sampled_from(["MS", "QS", "YS"]),
432+
)
433+
@pytest.mark.slow
434+
def test_against_scalar_parametric(freq, dt, n):
435+
# https://github.com/pandas-dev/pandas/issues/49606
436+
freq = f"{n}{freq}"
437+
d = date_range(dt, periods=3, freq=freq)
438+
result = list(d.is_year_start)
439+
expected = [x.is_year_start for x in d]
440+
assert result == expected

0 commit comments

Comments
 (0)