Skip to content

Commit 03b510c

Browse files
mgmarinojreback
andauthored
ENH: Add isocalendar accessor to DatetimeIndex and Series.dt (#33220)
* Add function to get_iso_calendar - This function reproduces what `datetime.date.isocalendar` returns - Refactor get_week_of_year to use get_iso_calendar internally * Add fields function to build isocalendar array - Returns a structured numpy array with year, week, and day corresponding to the ISO 8601 calendar * Add isocalendar property to DatetimeArray - Add corresponding access properties in accessors - This calculates the year, week, and day components according to the ISO 8601 calendar and returns them in a data frame. - It is analogous to Timestamp.isocalendar and datetime.date.isocalendar * Fix dt tests to handle DataFrame output * Update what’s new for isocalendar * Add columns explicitly when creating DataFrame * Set fill value to a tuple of nan - Older versions of numpy otherwise throw an error: ``` TypeError: a bytes-like object is required, not 'float' ``` * Define return of iso_calendar as ctypedef * Use to_datetime instead of dtype=“datetime64[D]” * Fix doc string to be in row format * Improve readability of ccalendar test * Return Int64 dataframe when NaT present * Clean up example to use a single Series * Always return an Int64 data frame * Return UInt32 per calendar ops standard * Add timeseries documentation Co-authored-by: Jeff Reback <[email protected]>
1 parent 12f9a10 commit 03b510c

File tree

10 files changed

+218
-16
lines changed

10 files changed

+218
-16
lines changed

doc/source/user_guide/timeseries.rst

+10
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ There are several time/date properties that one can access from ``Timestamp`` or
772772
week,"The week ordinal of the year"
773773
dayofweek,"The number of the day of the week with Monday=0, Sunday=6"
774774
weekday,"The number of the day of the week with Monday=0, Sunday=6"
775+
isocalendar,"The ISO 8601 year, week and day of the date"
775776
quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc."
776777
days_in_month,"The number of days in the month of the datetime"
777778
is_month_start,"Logical indicating if first day of month (defined by frequency)"
@@ -786,6 +787,15 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can
786787
access these properties via the ``.dt`` accessor, as detailed in the section
787788
on :ref:`.dt accessors<basics.dt_accessors>`.
788789

790+
.. versionadded:: 1.1.0
791+
792+
You may obtain the year, week and day components of the ISO year from the ISO 8601 standard:
793+
794+
.. ipython:: python
795+
796+
idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
797+
idx.to_series().dt.isocalendar
798+
789799
.. _timeseries.offsets:
790800

791801
DateOffset objects

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Other enhancements
8888
- :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`).
8989
- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
9090
- :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
91+
- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` accessor that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`).
9192
- The :meth:`DataFrame.to_feather` method now supports additional keyword
9293
arguments (e.g. to set the compression) that are added in pyarrow 0.17
9394
(:issue:`33422`).

pandas/_libs/tslibs/ccalendar.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ from cython cimport Py_ssize_t
22

33
from numpy cimport int64_t, int32_t
44

5+
ctypedef (int32_t, int32_t, int32_t) iso_calendar_t
56

67
cdef int dayofweek(int y, int m, int d) nogil
78
cdef bint is_leapyear(int64_t year) nogil
89
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
910
cpdef int32_t get_week_of_year(int year, int month, int day) nogil
11+
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil
1012
cpdef int32_t get_day_of_year(int year, int month, int day) nogil

pandas/_libs/tslibs/ccalendar.pyx

+43-11
Original file line numberDiff line numberDiff line change
@@ -150,33 +150,65 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
150150
-------
151151
week_of_year : int32_t
152152
153+
Notes
154+
-----
155+
Assumes the inputs describe a valid date.
156+
"""
157+
return get_iso_calendar(year, month, day)[1]
158+
159+
160+
@cython.wraparound(False)
161+
@cython.boundscheck(False)
162+
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil:
163+
"""
164+
Return the year, week, and day of year corresponding to ISO 8601
165+
166+
Parameters
167+
----------
168+
year : int
169+
month : int
170+
day : int
171+
172+
Returns
173+
-------
174+
year : int32_t
175+
week : int32_t
176+
day : int32_t
177+
153178
Notes
154179
-----
155180
Assumes the inputs describe a valid date.
156181
"""
157182
cdef:
158183
int32_t doy, dow
159-
int woy
184+
int32_t iso_year, iso_week
160185

161186
doy = get_day_of_year(year, month, day)
162187
dow = dayofweek(year, month, day)
163188

164189
# estimate
165-
woy = (doy - 1) - dow + 3
166-
if woy >= 0:
167-
woy = woy // 7 + 1
190+
iso_week = (doy - 1) - dow + 3
191+
if iso_week >= 0:
192+
iso_week = iso_week // 7 + 1
168193

169194
# verify
170-
if woy < 0:
171-
if (woy > -2) or (woy == -2 and is_leapyear(year - 1)):
172-
woy = 53
195+
if iso_week < 0:
196+
if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)):
197+
iso_week = 53
173198
else:
174-
woy = 52
175-
elif woy == 53:
199+
iso_week = 52
200+
elif iso_week == 53:
176201
if 31 - day + dow < 3:
177-
woy = 1
202+
iso_week = 1
203+
204+
iso_year = year
205+
if iso_week == 1 and doy > 7:
206+
iso_year += 1
207+
208+
elif iso_week >= 52 and doy < 7:
209+
iso_year -= 1
178210

179-
return woy
211+
return iso_year, iso_week, dow + 1
180212

181213

182214
@cython.wraparound(False)

pandas/_libs/tslibs/fields.pyx

+41-2
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@ from cython import Py_ssize_t
88

99
import numpy as np
1010
cimport numpy as cnp
11-
from numpy cimport ndarray, int64_t, int32_t, int8_t
11+
from numpy cimport ndarray, int64_t, int32_t, int8_t, uint32_t
1212
cnp.import_array()
1313

1414
from pandas._libs.tslibs.ccalendar import (
1515
get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS)
1616
from pandas._libs.tslibs.ccalendar cimport (
1717
get_days_in_month, is_leapyear, dayofweek, get_week_of_year,
18-
get_day_of_year)
18+
get_day_of_year, get_iso_calendar, iso_calendar_t)
1919
from pandas._libs.tslibs.np_datetime cimport (
2020
npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct,
2121
td64_to_tdstruct)
@@ -670,3 +670,42 @@ cpdef isleapyear_arr(ndarray years):
670670
np.logical_and(years % 4 == 0,
671671
years % 100 > 0))] = 1
672672
return out.view(bool)
673+
674+
675+
@cython.wraparound(False)
676+
@cython.boundscheck(False)
677+
def build_isocalendar_sarray(const int64_t[:] dtindex):
678+
"""
679+
Given a int64-based datetime array, return the ISO 8601 year, week, and day
680+
as a structured array.
681+
"""
682+
cdef:
683+
Py_ssize_t i, count = len(dtindex)
684+
npy_datetimestruct dts
685+
ndarray[uint32_t] iso_years, iso_weeks, days
686+
iso_calendar_t ret_val
687+
688+
sa_dtype = [
689+
("year", "u4"),
690+
("week", "u4"),
691+
("day", "u4"),
692+
]
693+
694+
out = np.empty(count, dtype=sa_dtype)
695+
696+
iso_years = out["year"]
697+
iso_weeks = out["week"]
698+
days = out["day"]
699+
700+
with nogil:
701+
for i in range(count):
702+
if dtindex[i] == NPY_NAT:
703+
ret_val = 0, 0, 0
704+
else:
705+
dt64_to_dtstruct(dtindex[i], &dts)
706+
ret_val = get_iso_calendar(dts.year, dts.month, dts.day)
707+
708+
iso_years[i] = ret_val[0]
709+
iso_weeks[i] = ret_val[1]
710+
days[i] = ret_val[2]
711+
return out

pandas/core/arrays/datetimes.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps
182182
"microsecond",
183183
"nanosecond",
184184
]
185-
_other_ops = ["date", "time", "timetz"]
185+
_other_ops = ["date", "time", "timetz", "isocalendar"]
186186
_datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
187187
_datetimelike_methods = [
188188
"to_period",
@@ -1234,6 +1234,50 @@ def date(self):
12341234

12351235
return tslib.ints_to_pydatetime(timestamps, box="date")
12361236

1237+
@property
1238+
def isocalendar(self):
1239+
"""
1240+
Returns a DataFrame with the year, week, and day calculated according to
1241+
the ISO 8601 standard.
1242+
1243+
.. versionadded:: 1.1.0
1244+
1245+
Returns
1246+
-------
1247+
DataFrame
1248+
with columns year, week and day
1249+
1250+
See Also
1251+
--------
1252+
Timestamp.isocalendar
1253+
datetime.date.isocalendar
1254+
1255+
Examples
1256+
--------
1257+
>>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
1258+
>>> idx.isocalendar
1259+
year week day
1260+
0 2019 52 7
1261+
1 2020 1 1
1262+
2 2020 1 2
1263+
3 2020 1 3
1264+
>>> idx.isocalendar.week
1265+
0 52
1266+
1 1
1267+
2 1
1268+
3 1
1269+
Name: week, dtype: UInt32
1270+
"""
1271+
from pandas import DataFrame
1272+
1273+
sarray = fields.build_isocalendar_sarray(self.asi8)
1274+
iso_calendar_df = DataFrame(
1275+
sarray, columns=["year", "week", "day"], dtype="UInt32"
1276+
)
1277+
if self._hasnans:
1278+
iso_calendar_df.iloc[self._isnan] = None
1279+
return iso_calendar_df
1280+
12371281
year = _field_accessor(
12381282
"year",
12391283
"Y",

pandas/core/indexes/accessors.py

+32
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,38 @@ def to_pydatetime(self) -> np.ndarray:
219219
def freq(self):
220220
return self._get_values().inferred_freq
221221

222+
@property
223+
def isocalendar(self):
224+
"""
225+
Returns a DataFrame with the year, week, and day calculated according to
226+
the ISO 8601 standard.
227+
228+
.. versionadded:: 1.1.0
229+
230+
Returns
231+
-------
232+
DataFrame
233+
with columns year, week and day
234+
235+
See Also
236+
--------
237+
Timestamp.isocalendar
238+
datetime.date.isocalendar
239+
240+
Examples
241+
--------
242+
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
243+
>>> ser.dt.isocalendar
244+
year week day
245+
0 2009 53 5
246+
1 <NA> <NA> <NA>
247+
>>> ser.dt.isocalendar.week
248+
0 53
249+
1 <NA>
250+
Name: week, dtype: UInt32
251+
"""
252+
return self._get_values().isocalendar.set_index(self._parent.index)
253+
222254

223255
@delegate_names(
224256
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"

pandas/core/indexes/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def _new_DatetimeIndex(cls, d):
8989
"date",
9090
"time",
9191
"timetz",
92+
"isocalendar",
9293
]
9394
+ DatetimeArray._bool_ops,
9495
DatetimeArray,

pandas/tests/series/test_datetime_values.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def get_expected(s, name):
6565
if isinstance(result, np.ndarray):
6666
if is_integer_dtype(result):
6767
result = result.astype("int64")
68-
elif not is_list_like(result):
68+
elif not is_list_like(result) or isinstance(result, pd.DataFrame):
6969
return result
7070
return Series(result, index=s.index, name=s.name)
7171

@@ -74,6 +74,8 @@ def compare(s, name):
7474
b = get_expected(s, prop)
7575
if not (is_list_like(a) and is_list_like(b)):
7676
assert a == b
77+
elif isinstance(a, pd.DataFrame):
78+
tm.assert_frame_equal(a, b)
7779
else:
7880
tm.assert_series_equal(a, b)
7981

@@ -665,3 +667,19 @@ def test_setitem_with_different_tz(self):
665667
dtype=object,
666668
)
667669
tm.assert_series_equal(ser, expected)
670+
671+
@pytest.mark.parametrize(
672+
"input_series, expected_output",
673+
[
674+
[["2020-01-01"], [[2020, 1, 3]]],
675+
[[pd.NaT], [[np.NaN, np.NaN, np.NaN]]],
676+
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
677+
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]],
678+
],
679+
)
680+
def test_isocalendar(self, input_series, expected_output):
681+
result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar
682+
expected_frame = pd.DataFrame(
683+
expected_output, columns=["year", "week", "day"], dtype="UInt32"
684+
)
685+
tm.assert_frame_equal(result, expected_frame)

pandas/tests/tslibs/test_ccalendar.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import date, datetime
22

33
import numpy as np
44
import pytest
@@ -25,3 +25,26 @@ def test_get_day_of_year_dt():
2525

2626
expected = (dt - dt.replace(month=1, day=1)).days + 1
2727
assert result == expected
28+
29+
30+
@pytest.mark.parametrize(
31+
"input_date_tuple, expected_iso_tuple",
32+
[
33+
[(2020, 1, 1), (2020, 1, 3)],
34+
[(2019, 12, 31), (2020, 1, 2)],
35+
[(2019, 12, 30), (2020, 1, 1)],
36+
[(2009, 12, 31), (2009, 53, 4)],
37+
[(2010, 1, 1), (2009, 53, 5)],
38+
[(2010, 1, 3), (2009, 53, 7)],
39+
[(2010, 1, 4), (2010, 1, 1)],
40+
[(2006, 1, 1), (2005, 52, 7)],
41+
[(2005, 12, 31), (2005, 52, 6)],
42+
[(2008, 12, 28), (2008, 52, 7)],
43+
[(2008, 12, 29), (2009, 1, 1)],
44+
],
45+
)
46+
def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple):
47+
result = ccalendar.get_iso_calendar(*input_date_tuple)
48+
expected_from_date_isocalendar = date(*input_date_tuple).isocalendar()
49+
assert result == expected_from_date_isocalendar
50+
assert result == expected_iso_tuple

0 commit comments

Comments
 (0)