Skip to content

Backport PR #53795 on branch 2.0.x (BUG: fixes weekday for dates before 1752) #53884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
-
Expand Down
47 changes: 34 additions & 13 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
Cython implementations of functions resembling the stdlib calendar module
"""

cimport cython
from numpy cimport (
int32_t,
Expand All @@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]

# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
Expand Down Expand Up @@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:

@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
cdef int dayofweek(int y, int m, int d) nogil:
@cython.cdivision(True)
cdef long quot(long a , long b) noexcept nogil:
cdef long x
x = a/b
if (a < 0):
x -= (a % b != 0)
return x


@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
cdef int dayofweek(int y, int m, int d) noexcept nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
using Gauss' method, from wikipedia.

0 represents Monday. See [1]_.

Expand All @@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) nogil:
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday

[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
Determination_of_the_day_of_the_week#Gauss's_algorithm
"""
# Note: this particular implementation comes from
# http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf
cdef:
int day

y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7

long c
int g
int f
int e

if (m < 3):
y -= 1

c = quot(y, 100)
g = y - c * 100
f = 5 * (c - quot(c, 4) * 4)
e = em[m]

if (m > 2):
e -= 1
return (-1 + d + e + f + g + g/4) % 7

cdef bint is_leapyear(int64_t year) nogil:
"""
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
tzlocal,
tzutc,
)
from hypothesis import (
given,
strategies as st,
)
import numpy as np
import pytest
import pytz
Expand Down Expand Up @@ -223,6 +227,39 @@ def test_resolution(self):
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
assert dt.as_unit("s").resolution == Timedelta(seconds=1)

@pytest.mark.parametrize(
"date_string, expected",
[
("0000-2-29", 1),
("0000-3-1", 2),
("1582-10-14", 3),
("-0040-1-1", 4),
("2023-06-18", 6),
],
)
def test_dow_historic(self, date_string, expected):
# GH 53738
ts = Timestamp(date_string)
dow = ts.weekday()
assert dow == expected

@given(
ts=st.datetimes(),
sign=st.sampled_from(["-", ""]),
)
def test_dow_parametric(self, ts, sign):
# GH 53738
ts = (
f"{sign}{str(ts.year).zfill(4)}"
f"-{str(ts.month).zfill(2)}"
f"-{str(ts.day).zfill(2)}"
)
result = Timestamp(ts).weekday()
expected = (
(np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4
) % 7
assert result == expected


class TestTimestamp:
def test_default_to_stdlib_utc(self):
Expand Down