Skip to content

BUG: fixes weekday for dates before 1752 #53795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 27, 2023
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,11 @@ Datetimelike
- Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`)
- Bug in :meth:`Timestamp.date`, :meth:`Timestamp.isocalendar`, :meth:`Timestamp.timetuple`, and :meth:`Timestamp.toordinal` were returning incorrect results for inputs outside those supported by the Python standard library's datetime module (:issue:`53668`)
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
- Bug in :meth:`Timestamp.weekday` with days incorrectly calculating before year 1752 (:issue:`53738`)
- Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
- Bug in constructing a :class:`Series` or :class:`DataFrame` from a datetime or timedelta scalar always inferring nanosecond resolution instead of inferring from the input (:issue:`52212`)
- Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`)
-

Timedelta
^^^^^^^^^
Expand Down
43 changes: 31 additions & 12 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
Cython implementations of functions resembling the stdlib calendar module
"""

cimport cython
from numpy cimport (
int32_t,
Expand All @@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]

# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
Expand Down Expand Up @@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) noexcept nogil:

@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
@cython.cdivision(True)
cdef long quot(long a , long b) noexcept nogil:
cdef long x
x = a/b
if (a < 0):
x -= (a % b != 0)
return x


@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
cdef int dayofweek(int y, int m, int d) noexcept nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
using Gauss' method, from wikipedia.

0 represents Monday. See [1]_.

Expand All @@ -103,16 +113,25 @@ cdef int dayofweek(int y, int m, int d) noexcept nogil:
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday

[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
Determination_of_the_day_of_the_week#Gauss's_algorithm
"""
cdef:
int day

y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7

long c
int g
int f
int e

if (m < 3):
y -= 1

c = quot(y, 100)
g = y - quot(y, 100) * 100
f = 5 * (c - quot(c, 4) * 4)
e = em[m]

if (m > 2):
e -= 1
return (-1 + d + e + f + g + g/4) % 7

cdef bint is_leapyear(int64_t year) noexcept nogil:
"""
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
tzlocal,
tzutc,
)
from hypothesis import (
given,
strategies as st,
)
import numpy as np
import pytest
import pytz
Expand Down Expand Up @@ -223,6 +227,31 @@ def test_resolution(self):
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
assert dt.as_unit("s").resolution == Timedelta(seconds=1)

@pytest.mark.parametrize(
"date_string, expected_true, expected_false",
[
("0000-2-29", 1, 2),
("0000-3-1", 2, 3),
("1582-10-14", 3, 4),
("1582-10-15", 4, 5),
("1752-01-01", 5, 6),
("2023-06-18", 6, 0),
],
)
def test_dow_historic(self, date_string, expected_true, expected_false):
# GH 53738
dt = Timestamp(date_string)
dow = dt.weekday()
assert dow == expected_true
assert not dow == expected_false

@given(st.datetimes())
def test_dow_sanity(self, dt):
# GH 53738
result = Timestamp(dt).weekday()
expected = datetime(dt.year, dt.month, dt.day).weekday()
assert result == expected


class TestTimestamp:
def test_default_to_stdlib_utc(self):
Expand Down