From d2edb1181f239f879e558100c64943094be14b06 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Wed, 1 Apr 2020 22:26:02 +0200 Subject: [PATCH 01/16] Add function to get_iso_calendar - This function reproduces what `datetime.date.isocalendar` returns - Refactor get_week_of_year to use get_iso_calendar internally --- pandas/_libs/tslibs/ccalendar.pxd | 1 + pandas/_libs/tslibs/ccalendar.pyx | 52 +++++++++++++++++++++------ pandas/tests/tslibs/test_ccalendar.py | 26 +++++++++++++- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 59ecaaaf2266e..5d37173dbc1fd 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -7,4 +7,5 @@ cdef int dayofweek(int y, int m, int d) nogil cdef bint is_leapyear(int64_t year) nogil cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef (int32_t, int32_t, int32_t) get_iso_calendar(int year, int month, int day) nogil cpdef int32_t get_day_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 0588dfe20e2e2..1c4a83ac0c2a8 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -150,33 +150,63 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: ------- week_of_year : int32_t + Notes + ----- + Assumes the inputs describe a valid date. + """ + return get_iso_calendar(year, month, day)[1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef (int32_t, int32_t, int32_t) get_iso_calendar(int year, int month, int day) nogil: + """ + Return the year, week, and day of year corresponding to ISO 8601 + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + (year : int32_t, week : int32_t, day : int32_t) + Notes ----- Assumes the inputs describe a valid date. """ cdef: int32_t doy, dow - int woy + int32_t iso_year, iso_week doy = get_day_of_year(year, month, day) dow = dayofweek(year, month, day) # estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy // 7 + 1 + iso_week = (doy - 1) - dow + 3 + if iso_week >= 0: + iso_week = iso_week // 7 + 1 # verify - if woy < 0: - if (woy > -2) or (woy == -2 and is_leapyear(year - 1)): - woy = 53 + if iso_week < 0: + if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)): + iso_week = 53 else: - woy = 52 - elif woy == 53: + iso_week = 52 + elif iso_week == 53: if 31 - day + dow < 3: - woy = 1 + iso_week = 1 + + iso_year = year + if iso_week == 1 and doy > 7: + iso_year += 1 + + elif iso_week >= 52 and doy < 7: + iso_year -= 1 - return woy + return iso_year, iso_week, dow + 1 @cython.wraparound(False) diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 6f6e32411a784..2471bcb28ebb0 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import date, datetime import numpy as np import pytest @@ -25,3 +25,27 @@ def test_get_day_of_year_dt(): expected = (dt - dt.replace(month=1, day=1)).days + 1 assert result == expected + + +@pytest.mark.parametrize( + "input_date_tuple, expected_iso_tuple", + [ + [(2020, 1, 1), (2020, 1, 3)], + [(2019, 12, 31), (2020, 1, 2)], + [(2019, 12, 30), (2020, 1, 1)], + [(2009, 12, 31), (2009, 53, 4)], + [(2010, 1, 1), (2009, 53, 5)], + [(2010, 1, 3), (2009, 53, 7)], + [(2010, 1, 4), (2010, 1, 1)], + [(2006, 1, 1), (2005, 52, 7)], + [(2005, 12, 31), (2005, 52, 6)], + [(2008, 12, 28), (2008, 52, 7)], + [(2008, 12, 29), (2009, 1, 1)], + ], +) +def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple): + assert ( + ccalendar.get_iso_calendar(*input_date_tuple) + == date(*input_date_tuple).isocalendar() + ) + assert ccalendar.get_iso_calendar(*input_date_tuple) == expected_iso_tuple From 7e807a6f8c2fbb5a454db07a5d13e63f969baa60 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Thu, 2 Apr 2020 23:19:59 +0200 Subject: [PATCH 02/16] Add fields function to build isocalendar array - Returns a structured numpy array with year, week, and day corresponding to the ISO 8601 calendar --- pandas/_libs/tslibs/fields.pyx | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 50b7fba67e78f..24366710e3eb5 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -15,7 +15,7 @@ from pandas._libs.tslibs.ccalendar import ( get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS) from pandas._libs.tslibs.ccalendar cimport ( get_days_in_month, is_leapyear, dayofweek, get_week_of_year, - get_day_of_year) + get_day_of_year, get_iso_calendar) from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct) @@ -670,3 +670,42 @@ cpdef isleapyear_arr(ndarray years): np.logical_and(years % 4 == 0, years % 100 > 0))] = 1 return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_isocalendar_sarray(const int64_t[:] dtindex): + """ + Given a int64-based datetime array, return the ISO 8601 year, week, and day + as a structured array. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[int32_t] iso_years, iso_weeks, days + (int32_t, int32_t, int32_t) ret_val + + sa_dtype = [ + ("year", "i4"), + ("week", "i4"), + ("day", "i4"), + ] + + out = np.empty(count, dtype=sa_dtype) + + iso_years = out["year"] + iso_weeks = out["week"] + days = out["day"] + + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + ret_val = -1, -1, -1 + else: + dt64_to_dtstruct(dtindex[i], &dts) + ret_val = get_iso_calendar(dts.year, dts.month, dts.day) + + iso_years[i] = ret_val[0] + iso_weeks[i] = ret_val[1] + days[i] = ret_val[2] + return out From 3fd4cfb63d903ea37e6ce9c88522820fc365f0c6 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 3 Apr 2020 22:21:43 +0200 Subject: [PATCH 03/16] Add isocalendar property to DatetimeArray - Add corresponding access properties in accessors - This calculates the year, week, and day components according to the ISO 8601 calendar and returns them in a data frame. - It is analogous to Timestamp.isocalendar and datetime.date.isocalendar --- pandas/core/arrays/datetimes.py | 44 ++++++++++++++++++++- pandas/core/indexes/accessors.py | 34 ++++++++++++++++ pandas/core/indexes/datetimes.py | 1 + pandas/tests/series/test_datetime_values.py | 20 ++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b9f9edcebad5b..8470f82c64664 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -182,7 +182,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps "microsecond", "nanosecond", ] - _other_ops = ["date", "time", "timetz"] + _other_ops = ["date", "time", "timetz", "isocalendar"] _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops _datetimelike_methods = [ "to_period", @@ -1234,6 +1234,48 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") + @property + def isocalendar(self): + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar + datetime.date.isocalendar + + Examples + -------- + >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + >>> idx.isocalendar + year week day + 0 2019 52 7 + 1 2020 1 1 + 2 2020 1 2 + 3 2020 1 3 + >>> idx.isocalendar.week + 0 52 + 1 1 + 2 1 + 3 1 + Name: week, dtype: int32 + """ + from pandas import DataFrame + + sarray = fields.build_isocalendar_sarray(self.asi8) + iso_calendar_array = self._maybe_mask_results( + sarray, fill_value=np.nan, convert=[(n, " np.ndarray: def freq(self): return self._get_values().inferred_freq + @property + def isocalendar(self): + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar + datetime.date.isocalendar + + Examples + -------- + >>> pd.Series(["2020-01-01"], dtype="datetime64[D]").dt.isocalendar + year week day + 0 2020 1 3 + >>> ser = pd.Series(["2010-01-01", pd.NaT], dtype="datetime64[D]") + >>> ser.dt.isocalendar + year week day + 0 2009.0 53.0 5.0 + 1 NaN NaN NaN + >>> pd.Series(["2019-12-31"], dtype="datetime64[D]").dt.isocalendar.week + 0 1 + Name: week, dtype: int32 + """ + return self._get_values().isocalendar.set_index(self._parent.index) + @delegate_names( delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 68d6229e798f5..1ec6cf8fd7b4e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -89,6 +89,7 @@ def _new_DatetimeIndex(cls, d): "date", "time", "timetz", + "isocalendar", ] + DatetimeArray._bool_ops, DatetimeArray, diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index d22dc72eaaadd..e0715a7596d73 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -665,3 +665,23 @@ def test_setitem_with_different_tz(self): dtype=object, ) tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "input_series, expected_output, expected_type", + [ + [["2020-01-01"], [[2020, 1, 3]], "int32"], + [[pd.NaT], [[np.NaN, np.NaN, np.NaN]], "float64"], + [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]], "int32"], + [ + ["2010-01-01", pd.NaT], + [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]], + "float64", + ], + ], + ) + def test_isocalendar(self, input_series, expected_output, expected_type): + ser = pd.Series(input_series, dtype="datetime64[D]") + expected_frame = pd.DataFrame( + expected_output, columns=["year", "week", "day"] + ).astype(expected_type) + tm.assert_frame_equal(ser.dt.isocalendar, expected_frame) From 4a32f4ac5c97eac912640980bffcf81328902c7f Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 3 Apr 2020 22:30:42 +0200 Subject: [PATCH 04/16] Fix dt tests to handle DataFrame output --- pandas/tests/series/test_datetime_values.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index e0715a7596d73..70518b80e27bc 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -65,7 +65,7 @@ def get_expected(s, name): if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") - elif not is_list_like(result): + elif not is_list_like(result) or isinstance(result, pd.DataFrame): return result return Series(result, index=s.index, name=s.name) @@ -74,6 +74,8 @@ def compare(s, name): b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b + elif isinstance(a, pd.DataFrame): + tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) From e994788f9cbba09f6ecf5496bc355df5ae7d894a Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Fri, 3 Apr 2020 22:35:24 +0200 Subject: [PATCH 05/16] =?UTF-8?q?Update=20what=E2=80=99s=20new=20for=20iso?= =?UTF-8?q?calendar?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 97a7f22df3985..dd180b5e2a8b4 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -88,6 +88,7 @@ Other enhancements - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`) +- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` accessor that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`). - .. --------------------------------------------------------------------------- From 93d093e168c625bdb5b720cfe374b0e508f09acc Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Sat, 4 Apr 2020 09:06:22 +0200 Subject: [PATCH 06/16] Add columns explicitly when creating DataFrame --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8470f82c64664..e5589f4e155f8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1274,7 +1274,7 @@ def isocalendar(self): iso_calendar_array = self._maybe_mask_results( sarray, fill_value=np.nan, convert=[(n, " Date: Sat, 4 Apr 2020 09:22:49 +0200 Subject: [PATCH 07/16] Set fill value to a tuple of nan - Older versions of numpy otherwise throw an error: ``` TypeError: a bytes-like object is required, not 'float' ``` --- pandas/core/arrays/datetimes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e5589f4e155f8..f6afa8e2de706 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1272,7 +1272,9 @@ def isocalendar(self): sarray = fields.build_isocalendar_sarray(self.asi8) iso_calendar_array = self._maybe_mask_results( - sarray, fill_value=np.nan, convert=[(n, " Date: Sat, 4 Apr 2020 20:31:59 +0200 Subject: [PATCH 08/16] Define return of iso_calendar as ctypedef --- pandas/_libs/tslibs/ccalendar.pxd | 3 ++- pandas/_libs/tslibs/ccalendar.pyx | 2 +- pandas/_libs/tslibs/fields.pyx | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 5d37173dbc1fd..68ad1d1e68133 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -2,10 +2,11 @@ from cython cimport Py_ssize_t from numpy cimport int64_t, int32_t +ctypedef (int32_t, int32_t, int32_t) iso_calendar_t cdef int dayofweek(int y, int m, int d) nogil cdef bint is_leapyear(int64_t year) nogil cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil cpdef int32_t get_week_of_year(int year, int month, int day) nogil -cpdef (int32_t, int32_t, int32_t) get_iso_calendar(int year, int month, int day) nogil +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil cpdef int32_t get_day_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 1c4a83ac0c2a8..058712d123b69 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -159,7 +159,7 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: @cython.wraparound(False) @cython.boundscheck(False) -cpdef (int32_t, int32_t, int32_t) get_iso_calendar(int year, int month, int day) nogil: +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil: """ Return the year, week, and day of year corresponding to ISO 8601 diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 24366710e3eb5..ba122eac8d3e4 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -15,7 +15,7 @@ from pandas._libs.tslibs.ccalendar import ( get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS) from pandas._libs.tslibs.ccalendar cimport ( get_days_in_month, is_leapyear, dayofweek, get_week_of_year, - get_day_of_year, get_iso_calendar) + get_day_of_year, get_iso_calendar, iso_calendar_t) from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct) @@ -683,7 +683,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex): Py_ssize_t i, count = len(dtindex) npy_datetimestruct dts ndarray[int32_t] iso_years, iso_weeks, days - (int32_t, int32_t, int32_t) ret_val + iso_calendar_t ret_val sa_dtype = [ ("year", "i4"), From 7865a70885136f423fdb64a03cd22e77fda78eb0 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Mon, 6 Apr 2020 08:05:08 +0200 Subject: [PATCH 09/16] =?UTF-8?q?Use=20to=5Fdatetime=20instead=20of=20dtyp?= =?UTF-8?q?e=3D=E2=80=9Cdatetime64[D]=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/core/indexes/accessors.py | 6 +++--- pandas/tests/series/test_datetime_values.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index afc3079a8f49c..cc59c9f0d029a 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -239,15 +239,15 @@ def isocalendar(self): Examples -------- - >>> pd.Series(["2020-01-01"], dtype="datetime64[D]").dt.isocalendar + >>> pd.to_datetime(pd.Series(["2020-01-01"])).dt.isocalendar year week day 0 2020 1 3 - >>> ser = pd.Series(["2010-01-01", pd.NaT], dtype="datetime64[D]") + >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT])) >>> ser.dt.isocalendar year week day 0 2009.0 53.0 5.0 1 NaN NaN NaN - >>> pd.Series(["2019-12-31"], dtype="datetime64[D]").dt.isocalendar.week + >>> pd.to_datetime(pd.Series(["2019-12-31"])).dt.isocalendar.week 0 1 Name: week, dtype: int32 """ diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 70518b80e27bc..d276d0b2f72bd 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -682,7 +682,7 @@ def test_setitem_with_different_tz(self): ], ) def test_isocalendar(self, input_series, expected_output, expected_type): - ser = pd.Series(input_series, dtype="datetime64[D]") + ser = pd.to_datetime(pd.Series(input_series)) expected_frame = pd.DataFrame( expected_output, columns=["year", "week", "day"] ).astype(expected_type) From 83e30058e472448ae1a44dd9024e1ef9b2ce8645 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Mon, 6 Apr 2020 21:32:32 +0200 Subject: [PATCH 10/16] Fix doc string to be in row format --- pandas/_libs/tslibs/ccalendar.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 058712d123b69..0873084d29555 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -171,7 +171,9 @@ cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil: Returns ------- - (year : int32_t, week : int32_t, day : int32_t) + year : int32_t + week : int32_t + day : int32_t Notes ----- From f0d1ae62f40f6abf3b5c85227d4a5e1f97508769 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Mon, 6 Apr 2020 21:32:58 +0200 Subject: [PATCH 11/16] Improve readability of ccalendar test --- pandas/tests/tslibs/test_ccalendar.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 2471bcb28ebb0..aab86d3a2df69 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -44,8 +44,7 @@ def test_get_day_of_year_dt(): ], ) def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple): - assert ( - ccalendar.get_iso_calendar(*input_date_tuple) - == date(*input_date_tuple).isocalendar() - ) - assert ccalendar.get_iso_calendar(*input_date_tuple) == expected_iso_tuple + result = ccalendar.get_iso_calendar(*input_date_tuple) + expected_from_date_isocalendar = date(*input_date_tuple).isocalendar() + assert result == expected_from_date_isocalendar + assert result == expected_iso_tuple From ff7b7e6b1fc4ef2554e1962d1c379392ed46e454 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Mon, 6 Apr 2020 21:37:00 +0200 Subject: [PATCH 12/16] Return Int64 dataframe when NaT present --- pandas/core/arrays/datetimes.py | 11 +++++------ pandas/tests/series/test_datetime_values.py | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f6afa8e2de706..2b8dc4b684185 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1271,12 +1271,11 @@ def isocalendar(self): from pandas import DataFrame sarray = fields.build_isocalendar_sarray(self.asi8) - iso_calendar_array = self._maybe_mask_results( - sarray, - fill_value=(np.nan, np.nan, np.nan), - convert=[(n, " Date: Mon, 6 Apr 2020 21:37:54 +0200 Subject: [PATCH 13/16] Clean up example to use a single Series --- pandas/core/indexes/accessors.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index cc59c9f0d029a..33f18bd9e17cd 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -239,17 +239,15 @@ def isocalendar(self): Examples -------- - >>> pd.to_datetime(pd.Series(["2020-01-01"])).dt.isocalendar - year week day - 0 2020 1 3 >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT])) >>> ser.dt.isocalendar - year week day - 0 2009.0 53.0 5.0 - 1 NaN NaN NaN - >>> pd.to_datetime(pd.Series(["2019-12-31"])).dt.isocalendar.week - 0 1 - Name: week, dtype: int32 + year week day + 0 2009 53 5 + 1 + >>> ser.dt.isocalendar.week + 0 53 + 1 + Name: week, dtype: Int64 """ return self._get_values().isocalendar.set_index(self._parent.index) From 9c303515b5c1c05351e6bc569a65876141d35e09 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Tue, 7 Apr 2020 07:43:41 +0200 Subject: [PATCH 14/16] Always return an Int64 data frame --- pandas/core/arrays/datetimes.py | 7 ++++--- pandas/tests/series/test_datetime_values.py | 20 ++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2b8dc4b684185..9e0360ae48919 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1266,14 +1266,15 @@ def isocalendar(self): 1 1 2 1 3 1 - Name: week, dtype: int32 + Name: week, dtype: Int64 """ from pandas import DataFrame sarray = fields.build_isocalendar_sarray(self.asi8) - iso_calendar_df = DataFrame(sarray, columns=["year", "week", "day"]) + iso_calendar_df = DataFrame( + sarray, columns=["year", "week", "day"], dtype="Int64" + ) if self._hasnans: - iso_calendar_df = iso_calendar_df.astype("Int64") iso_calendar_df.iloc[self._isnan] = None return iso_calendar_df diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 690969494ca0c..97b2e91753c4d 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -669,21 +669,17 @@ def test_setitem_with_different_tz(self): tm.assert_series_equal(ser, expected) @pytest.mark.parametrize( - "input_series, expected_output, expected_type", + "input_series, expected_output", [ - [["2020-01-01"], [[2020, 1, 3]], "int32"], - [[pd.NaT], [[np.NaN, np.NaN, np.NaN]], "Int64"], - [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]], "int32"], - [ - ["2010-01-01", pd.NaT], - [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]], - "Int64", - ], + [["2020-01-01"], [[2020, 1, 3]]], + [[pd.NaT], [[np.NaN, np.NaN, np.NaN]]], + [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]], + [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]], ], ) - def test_isocalendar(self, input_series, expected_output, expected_type): + def test_isocalendar(self, input_series, expected_output): result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar expected_frame = pd.DataFrame( - expected_output, columns=["year", "week", "day"] - ).astype(expected_type) + expected_output, columns=["year", "week", "day"], dtype="Int64" + ) tm.assert_frame_equal(result, expected_frame) From f40d2f0aea40f713faccc69f25a8aacd7b7bb9af Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Wed, 8 Apr 2020 18:20:22 +0200 Subject: [PATCH 15/16] Return UInt32 per calendar ops standard --- pandas/_libs/tslibs/fields.pyx | 12 ++++++------ pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/indexes/accessors.py | 2 +- pandas/tests/series/test_datetime_values.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index ba122eac8d3e4..184d368659714 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -8,7 +8,7 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, int32_t, int8_t +from numpy cimport ndarray, int64_t, int32_t, int8_t, uint32_t cnp.import_array() from pandas._libs.tslibs.ccalendar import ( @@ -682,13 +682,13 @@ def build_isocalendar_sarray(const int64_t[:] dtindex): cdef: Py_ssize_t i, count = len(dtindex) npy_datetimestruct dts - ndarray[int32_t] iso_years, iso_weeks, days + ndarray[uint32_t] iso_years, iso_weeks, days iso_calendar_t ret_val sa_dtype = [ - ("year", "i4"), - ("week", "i4"), - ("day", "i4"), + ("year", "u4"), + ("week", "u4"), + ("day", "u4"), ] out = np.empty(count, dtype=sa_dtype) @@ -700,7 +700,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex): with nogil: for i in range(count): if dtindex[i] == NPY_NAT: - ret_val = -1, -1, -1 + ret_val = 0, 0, 0 else: dt64_to_dtstruct(dtindex[i], &dts) ret_val = get_iso_calendar(dts.year, dts.month, dts.day) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9e0360ae48919..d6af11a442518 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1266,13 +1266,13 @@ def isocalendar(self): 1 1 2 1 3 1 - Name: week, dtype: Int64 + Name: week, dtype: UInt32 """ from pandas import DataFrame sarray = fields.build_isocalendar_sarray(self.asi8) iso_calendar_df = DataFrame( - sarray, columns=["year", "week", "day"], dtype="Int64" + sarray, columns=["year", "week", "day"], dtype="UInt32" ) if self._hasnans: iso_calendar_df.iloc[self._isnan] = None diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 33f18bd9e17cd..d44fed9e097e7 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -247,7 +247,7 @@ def isocalendar(self): >>> ser.dt.isocalendar.week 0 53 1 - Name: week, dtype: Int64 + Name: week, dtype: UInt32 """ return self._get_values().isocalendar.set_index(self._parent.index) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 97b2e91753c4d..515e75b82371a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -680,6 +680,6 @@ def test_setitem_with_different_tz(self): def test_isocalendar(self, input_series, expected_output): result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar expected_frame = pd.DataFrame( - expected_output, columns=["year", "week", "day"], dtype="Int64" + expected_output, columns=["year", "week", "day"], dtype="UInt32" ) tm.assert_frame_equal(result, expected_frame) From aa6a193b0fdd39b639998918de73f88868245916 Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Wed, 8 Apr 2020 20:42:33 +0200 Subject: [PATCH 16/16] Add timeseries documentation --- doc/source/user_guide/timeseries.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 0d49a2d8db77c..a09a5576ca378 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -772,6 +772,7 @@ There are several time/date properties that one can access from ``Timestamp`` or week,"The week ordinal of the year" dayofweek,"The number of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" + isocalendar,"The ISO 8601 year, week and day of the date" quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." days_in_month,"The number of days in the month of the datetime" is_month_start,"Logical indicating if first day of month (defined by frequency)" @@ -786,6 +787,15 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can access these properties via the ``.dt`` accessor, as detailed in the section on :ref:`.dt accessors`. +.. versionadded:: 1.1.0 + +You may obtain the year, week and day components of the ISO year from the ISO 8601 standard: + +.. ipython:: python + + idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + idx.to_series().dt.isocalendar + .. _timeseries.offsets: DateOffset objects