Skip to content

ENH: Add isocalendar accessor to DatetimeIndex and Series.dt #33220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ There are several time/date properties that one can access from ``Timestamp`` or
week,"The week ordinal of the year"
dayofweek,"The number of the day of the week with Monday=0, Sunday=6"
weekday,"The number of the day of the week with Monday=0, Sunday=6"
isocalendar,"The ISO 8601 year, week and day of the date"
quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc."
days_in_month,"The number of days in the month of the datetime"
is_month_start,"Logical indicating if first day of month (defined by frequency)"
Expand All @@ -786,6 +787,15 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can
access these properties via the ``.dt`` accessor, as detailed in the section
on :ref:`.dt accessors<basics.dt_accessors>`.

.. versionadded:: 1.1.0

You may obtain the year, week and day components of the ISO year from the ISO 8601 standard:

.. ipython:: python

idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
idx.to_series().dt.isocalendar

.. _timeseries.offsets:

DateOffset objects
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Other enhancements
- :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`).
- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
- :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` accessor that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`).
- The :meth:`DataFrame.to_feather` method now supports additional keyword
arguments (e.g. to set the compression) that are added in pyarrow 0.17
(:issue:`33422`).
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/ccalendar.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ from cython cimport Py_ssize_t

from numpy cimport int64_t, int32_t

ctypedef (int32_t, int32_t, int32_t) iso_calendar_t

cdef int dayofweek(int y, int m, int d) nogil
cdef bint is_leapyear(int64_t year) nogil
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
cpdef int32_t get_week_of_year(int year, int month, int day) nogil
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil
cpdef int32_t get_day_of_year(int year, int month, int day) nogil
54 changes: 43 additions & 11 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -150,33 +150,65 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
-------
week_of_year : int32_t

Notes
-----
Assumes the inputs describe a valid date.
"""
return get_iso_calendar(year, month, day)[1]


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil:
"""
Return the year, week, and day of year corresponding to ISO 8601

Parameters
----------
year : int
month : int
day : int

Returns
-------
year : int32_t
week : int32_t
day : int32_t

Notes
-----
Assumes the inputs describe a valid date.
"""
cdef:
int32_t doy, dow
int woy
int32_t iso_year, iso_week

doy = get_day_of_year(year, month, day)
dow = dayofweek(year, month, day)

# estimate
woy = (doy - 1) - dow + 3
if woy >= 0:
woy = woy // 7 + 1
iso_week = (doy - 1) - dow + 3
if iso_week >= 0:
iso_week = iso_week // 7 + 1

# verify
if woy < 0:
if (woy > -2) or (woy == -2 and is_leapyear(year - 1)):
woy = 53
if iso_week < 0:
if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)):
iso_week = 53
else:
woy = 52
elif woy == 53:
iso_week = 52
elif iso_week == 53:
if 31 - day + dow < 3:
woy = 1
iso_week = 1

iso_year = year
if iso_week == 1 and doy > 7:
iso_year += 1

elif iso_week >= 52 and doy < 7:
iso_year -= 1

return woy
return iso_year, iso_week, dow + 1


@cython.wraparound(False)
Expand Down
43 changes: 41 additions & 2 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ from cython import Py_ssize_t

import numpy as np
cimport numpy as cnp
from numpy cimport ndarray, int64_t, int32_t, int8_t
from numpy cimport ndarray, int64_t, int32_t, int8_t, uint32_t
cnp.import_array()

from pandas._libs.tslibs.ccalendar import (
get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS)
from pandas._libs.tslibs.ccalendar cimport (
get_days_in_month, is_leapyear, dayofweek, get_week_of_year,
get_day_of_year)
get_day_of_year, get_iso_calendar, iso_calendar_t)
from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct,
td64_to_tdstruct)
Expand Down Expand Up @@ -670,3 +670,42 @@ cpdef isleapyear_arr(ndarray years):
np.logical_and(years % 4 == 0,
years % 100 > 0))] = 1
return out.view(bool)


@cython.wraparound(False)
@cython.boundscheck(False)
def build_isocalendar_sarray(const int64_t[:] dtindex):
"""
Given a int64-based datetime array, return the ISO 8601 year, week, and day
as a structured array.
"""
cdef:
Py_ssize_t i, count = len(dtindex)
npy_datetimestruct dts
ndarray[uint32_t] iso_years, iso_weeks, days
iso_calendar_t ret_val

sa_dtype = [
("year", "u4"),
("week", "u4"),
("day", "u4"),
]

out = np.empty(count, dtype=sa_dtype)

iso_years = out["year"]
iso_weeks = out["week"]
days = out["day"]

with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
ret_val = 0, 0, 0
else:
dt64_to_dtstruct(dtindex[i], &dts)
ret_val = get_iso_calendar(dts.year, dts.month, dts.day)

iso_years[i] = ret_val[0]
iso_weeks[i] = ret_val[1]
days[i] = ret_val[2]
return out
46 changes: 45 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps
"microsecond",
"nanosecond",
]
_other_ops = ["date", "time", "timetz"]
_other_ops = ["date", "time", "timetz", "isocalendar"]
_datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
_datetimelike_methods = [
"to_period",
Expand Down Expand Up @@ -1234,6 +1234,50 @@ def date(self):

return tslib.ints_to_pydatetime(timestamps, box="date")

@property
def isocalendar(self):
"""
Returns a DataFrame with the year, week, and day calculated according to
the ISO 8601 standard.

.. versionadded:: 1.1.0

Returns
-------
DataFrame
with columns year, week and day

See Also
--------
Timestamp.isocalendar
datetime.date.isocalendar

Examples
--------
>>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
>>> idx.isocalendar
year week day
0 2019 52 7
1 2020 1 1
2 2020 1 2
3 2020 1 3
>>> idx.isocalendar.week
0 52
1 1
2 1
3 1
Name: week, dtype: UInt32
"""
from pandas import DataFrame

sarray = fields.build_isocalendar_sarray(self.asi8)
iso_calendar_df = DataFrame(
sarray, columns=["year", "week", "day"], dtype="UInt32"
)
if self._hasnans:
iso_calendar_df.iloc[self._isnan] = None
return iso_calendar_df

year = _field_accessor(
"year",
"Y",
Expand Down
32 changes: 32 additions & 0 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,38 @@ def to_pydatetime(self) -> np.ndarray:
def freq(self):
return self._get_values().inferred_freq

@property
def isocalendar(self):
"""
Returns a DataFrame with the year, week, and day calculated according to
the ISO 8601 standard.

.. versionadded:: 1.1.0

Returns
-------
DataFrame
with columns year, week and day

See Also
--------
Timestamp.isocalendar
datetime.date.isocalendar

Examples
--------
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
>>> ser.dt.isocalendar
year week day
0 2009 53 5
1 <NA> <NA> <NA>
>>> ser.dt.isocalendar.week
0 53
1 <NA>
Name: week, dtype: UInt32
"""
return self._get_values().isocalendar.set_index(self._parent.index)


@delegate_names(
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def _new_DatetimeIndex(cls, d):
"date",
"time",
"timetz",
"isocalendar",
]
+ DatetimeArray._bool_ops,
DatetimeArray,
Expand Down
20 changes: 19 additions & 1 deletion pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_expected(s, name):
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result):
elif not is_list_like(result) or isinstance(result, pd.DataFrame):
return result
return Series(result, index=s.index, name=s.name)

Expand All @@ -74,6 +74,8 @@ def compare(s, name):
b = get_expected(s, prop)
if not (is_list_like(a) and is_list_like(b)):
assert a == b
elif isinstance(a, pd.DataFrame):
tm.assert_frame_equal(a, b)
else:
tm.assert_series_equal(a, b)

Expand Down Expand Up @@ -665,3 +667,19 @@ def test_setitem_with_different_tz(self):
dtype=object,
)
tm.assert_series_equal(ser, expected)

@pytest.mark.parametrize(
"input_series, expected_output",
[
[["2020-01-01"], [[2020, 1, 3]]],
[[pd.NaT], [[np.NaN, np.NaN, np.NaN]]],
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]],
],
)
def test_isocalendar(self, input_series, expected_output):
result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar
expected_frame = pd.DataFrame(
expected_output, columns=["year", "week", "day"], dtype="UInt32"
)
tm.assert_frame_equal(result, expected_frame)
25 changes: 24 additions & 1 deletion pandas/tests/tslibs/test_ccalendar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import date, datetime

import numpy as np
import pytest
Expand All @@ -25,3 +25,26 @@ def test_get_day_of_year_dt():

expected = (dt - dt.replace(month=1, day=1)).days + 1
assert result == expected


@pytest.mark.parametrize(
"input_date_tuple, expected_iso_tuple",
[
[(2020, 1, 1), (2020, 1, 3)],
[(2019, 12, 31), (2020, 1, 2)],
[(2019, 12, 30), (2020, 1, 1)],
[(2009, 12, 31), (2009, 53, 4)],
[(2010, 1, 1), (2009, 53, 5)],
[(2010, 1, 3), (2009, 53, 7)],
[(2010, 1, 4), (2010, 1, 1)],
[(2006, 1, 1), (2005, 52, 7)],
[(2005, 12, 31), (2005, 52, 6)],
[(2008, 12, 28), (2008, 52, 7)],
[(2008, 12, 29), (2009, 1, 1)],
],
)
def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple):
result = ccalendar.get_iso_calendar(*input_date_tuple)
expected_from_date_isocalendar = date(*input_date_tuple).isocalendar()
assert result == expected_from_date_isocalendar
assert result == expected_iso_tuple