Skip to content

Commit b8f14c4

Browse files
Backport PR pandas-dev#53795 on branch 2.0.x (BUG: fixes weekday for dates before 1752) (pandas-dev#53884)
Backport PR pandas-dev#53795: BUG: fixes weekday for dates before 1752 Co-authored-by: Conrad Mcgee Stocks <[email protected]>
1 parent 7d8be44 commit b8f14c4

File tree

3 files changed

+72
-13
lines changed

3 files changed

+72
-13
lines changed

doc/source/whatsnew/v2.0.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
1617
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
1718
- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
1819
-

pandas/_libs/tslibs/ccalendar.pyx

+34-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
"""
33
Cython implementations of functions resembling the stdlib calendar module
44
"""
5-
65
cimport cython
76
from numpy cimport (
87
int32_t,
@@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
1918
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
2019
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
2120

22-
cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
21+
cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
2322

2423
# The first 13 entries give the month days elapsed as of the first of month N
2524
# (or the total number of days in the year for N=13) in non-leap years.
@@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
7675

7776
@cython.wraparound(False)
7877
@cython.boundscheck(False)
79-
@cython.cdivision
80-
cdef int dayofweek(int y, int m, int d) nogil:
78+
@cython.cdivision(True)
79+
cdef long quot(long a , long b) noexcept nogil:
80+
cdef long x
81+
x = a/b
82+
if (a < 0):
83+
x -= (a % b != 0)
84+
return x
85+
86+
87+
@cython.wraparound(False)
88+
@cython.boundscheck(False)
89+
@cython.cdivision(True)
90+
cdef int dayofweek(int y, int m, int d) noexcept nogil:
8191
"""
8292
Find the day of week for the date described by the Y/M/D triple y, m, d
83-
using Sakamoto's method, from wikipedia.
93+
using Gauss' method, from wikipedia.
8494
8595
0 represents Monday. See [1]_.
8696
@@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) nogil:
103113
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday
104114
105115
[2] https://en.wikipedia.org/wiki/\
106-
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
116+
Determination_of_the_day_of_the_week#Gauss's_algorithm
107117
"""
118+
# Note: this particular implementation comes from
119+
# http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf
108120
cdef:
109-
int day
110-
111-
y -= m < 3
112-
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
113-
# convert to python day
114-
return (day + 6) % 7
115-
121+
long c
122+
int g
123+
int f
124+
int e
125+
126+
if (m < 3):
127+
y -= 1
128+
129+
c = quot(y, 100)
130+
g = y - c * 100
131+
f = 5 * (c - quot(c, 4) * 4)
132+
e = em[m]
133+
134+
if (m > 2):
135+
e -= 1
136+
return (-1 + d + e + f + g + g/4) % 7
116137

117138
cdef bint is_leapyear(int64_t year) nogil:
118139
"""

pandas/tests/scalar/timestamp/test_timestamp.py

+37
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
tzlocal,
1515
tzutc,
1616
)
17+
from hypothesis import (
18+
given,
19+
strategies as st,
20+
)
1721
import numpy as np
1822
import pytest
1923
import pytz
@@ -223,6 +227,39 @@ def test_resolution(self):
223227
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
224228
assert dt.as_unit("s").resolution == Timedelta(seconds=1)
225229

230+
@pytest.mark.parametrize(
231+
"date_string, expected",
232+
[
233+
("0000-2-29", 1),
234+
("0000-3-1", 2),
235+
("1582-10-14", 3),
236+
("-0040-1-1", 4),
237+
("2023-06-18", 6),
238+
],
239+
)
240+
def test_dow_historic(self, date_string, expected):
241+
# GH 53738
242+
ts = Timestamp(date_string)
243+
dow = ts.weekday()
244+
assert dow == expected
245+
246+
@given(
247+
ts=st.datetimes(),
248+
sign=st.sampled_from(["-", ""]),
249+
)
250+
def test_dow_parametric(self, ts, sign):
251+
# GH 53738
252+
ts = (
253+
f"{sign}{str(ts.year).zfill(4)}"
254+
f"-{str(ts.month).zfill(2)}"
255+
f"-{str(ts.day).zfill(2)}"
256+
)
257+
result = Timestamp(ts).weekday()
258+
expected = (
259+
(np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4
260+
) % 7
261+
assert result == expected
262+
226263

227264
class TestTimestamp:
228265
def test_default_to_stdlib_utc(self):

0 commit comments

Comments
 (0)