From 1ace7537aca8d6d8f3d3759c3f9c02f67c3bc7ff Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Mar 2022 09:22:59 -0700 Subject: [PATCH 1/3] CLN: assorted --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/_libs/hashing.pyx | 2 +- pandas/_libs/hashtable_class_helper.pxi.in | 16 ++++++------ pandas/_libs/hashtable_func_helper.pxi.in | 6 +++-- pandas/_libs/interval.pyx | 4 +-- pandas/_libs/lib.pyx | 8 +++--- pandas/_libs/ops.pyx | 4 +-- pandas/_libs/parsers.pyx | 2 +- pandas/_libs/tslib.pyx | 5 ++-- pandas/_libs/tslibs/conversion.pyx | 4 +-- pandas/_libs/tslibs/dtypes.pyx | 2 +- pandas/_libs/tslibs/fields.pyi | 4 +-- pandas/_libs/tslibs/fields.pyx | 10 +++---- pandas/_libs/tslibs/nattype.pxd | 1 - pandas/_libs/tslibs/nattype.pyx | 26 +++++-------------- pandas/_libs/tslibs/np_datetime.pxd | 4 +++ pandas/_libs/tslibs/np_datetime.pyx | 11 ++------ pandas/_libs/tslibs/offsets.pyx | 18 ++++++------- pandas/_libs/tslibs/parsing.pyx | 23 ++++++++-------- pandas/_libs/tslibs/period.pyx | 12 ++++----- pandas/_libs/tslibs/strptime.pyx | 4 +-- pandas/_libs/tslibs/timedeltas.pxd | 1 - pandas/_libs/tslibs/timedeltas.pyx | 5 ++-- pandas/_libs/tslibs/timestamps.pyx | 4 +-- pandas/_libs/tslibs/tzconversion.pyx | 4 +-- pandas/_libs/tslibs/util.pxd | 1 - pandas/_libs/tslibs/vectorized.pyx | 8 +++--- pandas/core/indexes/datetimelike.py | 12 +++------ .../tests/groupby/transform/test_transform.py | 1 + 29 files changed, 91 insertions(+), 112 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c4a760efd9a40..a373870f268ae 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -466,6 +466,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) +- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Reshaping diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 39caf04ddf2f8..9c44e3f4ccf4a 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -53,7 +53,7 @@ def hash_object_array( """ cdef: Py_ssize_t i, n - uint64_t[:] result + uint64_t[::1] result bytes data, k uint8_t *kb uint64_t *lens diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 6ddf8d42b9baa..8a2b9c2f77627 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -503,7 +503,7 @@ cdef class {{name}}HashTable(HashTable): int ret = 0 {{c_type}} val khiter_t k - intp_t[:] locs = np.empty(n, dtype=np.intp) + intp_t[::1] locs = np.empty(n, dtype=np.intp) with nogil: for i in range(n): @@ -561,7 +561,7 @@ cdef class {{name}}HashTable(HashTable): """ cdef: Py_ssize_t i, idx, count = count_prior, n = len(values) - intp_t[:] labels + intp_t[::1] labels int ret = 0 {{c_type}} val, na_value2 khiter_t k @@ -710,7 +710,7 @@ cdef class {{name}}HashTable(HashTable): # tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]] cdef: Py_ssize_t i, n = len(values) - intp_t[:] labels + intp_t[::1] labels Py_ssize_t idx, count = 0 int ret = 0 {{c_type}} val @@ -848,7 +848,7 @@ cdef class StringHashTable(HashTable): object val const char *v khiter_t k - intp_t[:] locs = np.empty(n, dtype=np.intp) + intp_t[::1] locs = np.empty(n, dtype=np.intp) # these by-definition *must* be strings vecs = malloc(n * sizeof(char *)) @@ -946,8 +946,8 @@ cdef class StringHashTable(HashTable): """ cdef: Py_ssize_t i, idx, count = count_prior, n = len(values) - intp_t[:] labels - int64_t[:] uindexer + intp_t[::1] labels + int64_t[::1] uindexer int ret = 0 object val const char *v @@ -1168,7 +1168,7 @@ cdef class PyObjectHashTable(HashTable): int ret = 0 object val khiter_t k - intp_t[:] locs = np.empty(n, dtype=np.intp) + intp_t[::1] locs = np.empty(n, dtype=np.intp) for i in range(n): val = values[i] @@ -1223,7 +1223,7 @@ cdef class PyObjectHashTable(HashTable): """ cdef: Py_ssize_t i, idx, count = count_prior, n = len(values) - intp_t[:] labels + intp_t[::1] labels int ret = 0 object val khiter_t k diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 6b1fe25154e30..11a45bb194c03 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -85,7 +85,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna): {{endif}} # collect counts in the order corresponding to result_keys: - cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64) + cdef: + int64_t[::1] result_counts = np.empty(table.size, dtype=np.int64) + for i in range(table.size): {{if dtype == 'object'}} k = kh_get_{{ttype}}(table, result_keys.data[i]) @@ -366,7 +368,7 @@ def mode(ndarray[htfunc_t] values, bint dropna): ndarray[htfunc_t] keys ndarray[htfunc_t] modes - int64_t[:] counts + int64_t[::1] counts int64_t count, max_count = -1 Py_ssize_t nkeys, k, j = 0 diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index aba635e19995a..4c8419b78e2b8 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -5,11 +5,11 @@ from operator import ( ) from cpython.datetime cimport ( - PyDateTime_IMPORT, PyDelta_Check, + import_datetime, ) -PyDateTime_IMPORT +import_datetime() from cpython.object cimport ( Py_EQ, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c86eb80da93f7..18a58902075f2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -9,9 +9,9 @@ from cython import Py_ssize_t from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, PyTime_Check, + import_datetime, ) from cpython.iterator cimport PyIter_Check from cpython.number cimport PyNumber_Check @@ -27,7 +27,7 @@ from cpython.tuple cimport ( ) from cython cimport floating -PyDateTime_IMPORT +import_datetime() import numpy as np @@ -2470,8 +2470,8 @@ def maybe_convert_objects(ndarray[object] objects, ndarray[int64_t] ints ndarray[uint64_t] uints ndarray[uint8_t] bools - int64_t[:] idatetimes - int64_t[:] itimedeltas + int64_t[::1] idatetimes + int64_t[::1] itimedeltas Seen seen = Seen() object val float64_t fval, fnan = np.nan diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ac8a7f2cc57f7..df6a17cc4cc5e 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -194,7 +194,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: """ cdef: Py_ssize_t i, n = len(values) - object[:] result + object[::1] result object x result = np.empty(n, dtype=object) @@ -231,7 +231,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray: """ cdef: Py_ssize_t i, n = len(left) - object[:] result + object[::1] result if n != len(right): raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 59a86751964e6..6d96b5c0c9f03 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1457,7 +1457,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col, const char *word = NULL int64_t NA = -1 - int64_t[:] codes + int64_t[::1] codes int64_t current_category = 0 char *errors = "strict" diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 063b95953bd91..9dfc438319148 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -5,13 +5,13 @@ import cython from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, datetime, + import_datetime, tzinfo, ) # import datetime C API -PyDateTime_IMPORT +import_datetime() cimport numpy as cnp @@ -63,7 +63,6 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timestamps import Timestamp # Note: this is the only non-tslibs intra-pandas dependency here - from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7dce3cad9d339..9c5f8b18b38bc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -18,13 +18,13 @@ import pytz from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, datetime, + import_datetime, time, tzinfo, ) -PyDateTime_IMPORT +import_datetime() from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.np_datetime cimport ( diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 43f33b98b8eaa..02b78cfa530c9 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -165,7 +165,7 @@ class FreqGroup(Enum): FR_MS = c_FreqGroup.FR_MS FR_US = c_FreqGroup.FR_US FR_NS = c_FreqGroup.FR_NS - FR_UND = -c_FreqGroup.FR_UND # undefined + FR_UND = c_FreqGroup.FR_UND # undefined @staticmethod def from_period_dtype_code(code: int) -> "FreqGroup": diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index 415b4329310c0..571a327b46df8 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -22,7 +22,7 @@ def get_date_field( field: str, ) -> npt.NDArray[np.int32]: ... def get_timedelta_field( - tdindex: np.ndarray, # const int64_t[:] + tdindex: npt.NDArray[np.int64], # const int64_t[:] field: str, ) -> npt.NDArray[np.int32]: ... def isleapyear_arr( @@ -31,7 +31,7 @@ def isleapyear_arr( def build_isocalendar_sarray( dtindex: npt.NDArray[np.int64], # const int64_t[:] ) -> np.ndarray: ... -def get_locale_names(name_type: str, locale: str | None = ...): ... +def _get_locale_names(name_type: str, locale: str | None = ...): ... class RoundTo: @property diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index c1915e719f515..6d8b27e0ad75a 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -152,7 +152,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None) if locale is None: names = np.array(DAYS_FULL, dtype=np.object_) else: - names = np.array(get_locale_names('f_weekday', locale), + names = np.array(_get_locale_names('f_weekday', locale), dtype=np.object_) for i in range(count): if dtindex[i] == NPY_NAT: @@ -167,7 +167,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None) if locale is None: names = np.array(MONTHS_FULL, dtype=np.object_) else: - names = np.array(get_locale_names('f_month', locale), + names = np.array(_get_locale_names('f_month', locale), dtype=np.object_) for i in range(count): if dtindex[i] == NPY_NAT: @@ -574,7 +574,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex): return out -def get_locale_names(name_type: str, locale: object = None): +def _get_locale_names(name_type: str, locale: object = None): """ Returns an array of localized day or month names. @@ -650,7 +650,7 @@ class RoundTo: return 4 -cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): +cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") @@ -668,7 +668,7 @@ cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): return result -cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit): +cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index 5e5f4224f902f..e878fa7629f25 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -4,7 +4,6 @@ from numpy cimport int64_t cdef int64_t NPY_NAT -cdef bint _nat_scalar_rules[6] cdef set c_nat_strings cdef class _NaT(datetime): diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index e6a70177463b8..8ddbbfba49614 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -3,11 +3,13 @@ import warnings from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, datetime, + import_datetime, timedelta, ) + +import_datetime() from cpython.object cimport ( Py_EQ, Py_GE, @@ -18,10 +20,6 @@ from cpython.object cimport ( PyObject_RichCompare, ) -PyDateTime_IMPORT - -from cpython.version cimport PY_MINOR_VERSION - import numpy as np cimport numpy as cnp @@ -43,14 +41,6 @@ cdef set c_nat_strings = nat_strings cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT # python-visible constant -cdef bint _nat_scalar_rules[6] -_nat_scalar_rules[Py_EQ] = False -_nat_scalar_rules[Py_NE] = True -_nat_scalar_rules[Py_LT] = False -_nat_scalar_rules[Py_LE] = False -_nat_scalar_rules[Py_GT] = False -_nat_scalar_rules[Py_GE] = False - # ---------------------------------------------------------------------- @@ -107,7 +97,6 @@ def __nat_unpickle(*args): cdef class _NaT(datetime): # cdef readonly: # int64_t value - # object freq # higher than np.ndarray and np.matrix __array_priority__ = 100 @@ -115,16 +104,16 @@ cdef class _NaT(datetime): def __richcmp__(_NaT self, object other, int op): if util.is_datetime64_object(other) or PyDateTime_Check(other): # We treat NaT as datetime-like for this comparison - return _nat_scalar_rules[op] + return op == Py_NE elif util.is_timedelta64_object(other) or PyDelta_Check(other): # We treat NaT as timedelta-like for this comparison - return _nat_scalar_rules[op] + return op == Py_NE elif util.is_array(other): if other.dtype.kind in "mM": result = np.empty(other.shape, dtype=np.bool_) - result.fill(_nat_scalar_rules[op]) + result.fill(op == Py_NE) elif other.dtype.kind == "O": result = np.array([PyObject_RichCompare(self, x, op) for x in other]) elif op == Py_EQ: @@ -510,8 +499,7 @@ class NaTType(_NaT): utcoffset = _make_error_func("utcoffset", datetime) # "fromisocalendar" was introduced in 3.8 - if PY_MINOR_VERSION >= 8: - fromisocalendar = _make_error_func("fromisocalendar", datetime) + fromisocalendar = _make_error_func("fromisocalendar", datetime) # ---------------------------------------------------------------------- # The remaining methods have docstrings copy/pasted from the analogous diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c2bbc4fe764fe..d2b85369ecc57 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -8,6 +8,7 @@ from numpy cimport ( ) +# TODO(cython3): most of these can be cimported directly from numpy cdef extern from "numpy/ndarrayobject.h": ctypedef int64_t npy_timedelta ctypedef int64_t npy_datetime @@ -59,6 +60,9 @@ cdef extern from "src/datetime/np_datetime.h": NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil + npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 79a58478d630a..b0b431e5bb3cd 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -6,7 +6,7 @@ from cpython.datetime cimport ( PyDateTime_GET_DAY, PyDateTime_GET_MONTH, PyDateTime_GET_YEAR, - PyDateTime_IMPORT, + import_datetime, ) from cpython.object cimport ( Py_EQ, @@ -17,7 +17,7 @@ from cpython.object cimport ( Py_NE, ) -PyDateTime_IMPORT +import_datetime() from numpy cimport int64_t @@ -28,13 +28,6 @@ cdef extern from "src/datetime/np_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) - npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, - npy_datetimestruct *d) nogil - - void pandas_datetime_to_datetimestruct(npy_datetime val, - NPY_DATETIMEUNIT fr, - npy_datetimestruct *result) nogil - void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, pandas_timedeltastruct *result diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index f19d34d99c814..fef98199d3dbc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -8,15 +8,15 @@ import cython from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, date, datetime, + import_datetime, time as dt_time, timedelta, ) -PyDateTime_IMPORT +import_datetime() from dateutil.easter import easter from dateutil.relativedelta import relativedelta @@ -2392,7 +2392,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): int64_t[:] i8other = dtarr.view("i8") Py_ssize_t i, count = len(i8other) int64_t val - int64_t[:] out = np.empty(count, dtype="i8") + int64_t[::1] out = np.empty(count, dtype="i8") npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month @@ -2577,7 +2577,7 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = len(i8other) int64_t val - int64_t[:] out = np.empty(count, dtype="i8") + int64_t[::1] out = np.empty(count, dtype="i8") npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday @@ -3774,7 +3774,7 @@ cdef shift_quarters( """ cdef: Py_ssize_t count = len(dtindex) - int64_t[:] out = np.empty(count, dtype="int64") + int64_t[::1] out = np.empty(count, dtype="int64") if day_opt not in ["start", "end", "business_start", "business_end"]: raise ValueError("day must be None, 'start', 'end', " @@ -3800,7 +3800,7 @@ def shift_months(const int64_t[:] dtindex, int months, object day_opt=None): Py_ssize_t i npy_datetimestruct dts int count = len(dtindex) - int64_t[:] out = np.empty(count, dtype="int64") + int64_t[::1] out = np.empty(count, dtype="int64") if day_opt is None: with nogil: @@ -3827,7 +3827,7 @@ def shift_months(const int64_t[:] dtindex, int months, object day_opt=None): @cython.wraparound(False) @cython.boundscheck(False) cdef inline void _shift_months(const int64_t[:] dtindex, - int64_t[:] out, + int64_t[::1] out, Py_ssize_t count, int months, str day_opt) nogil: @@ -3859,7 +3859,7 @@ cdef inline void _shift_months(const int64_t[:] dtindex, @cython.wraparound(False) @cython.boundscheck(False) cdef inline void _shift_quarters(const int64_t[:] dtindex, - int64_t[:] out, + int64_t[::1] out, Py_ssize_t count, int quarters, int q1start_month, @@ -3906,7 +3906,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): """ cdef: Py_ssize_t i, n = len(i8other) - int64_t[:] result = np.empty(n, dtype="i8") + int64_t[::1] result = np.empty(n, dtype="i8") int64_t val, res int wday, nadj, days npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 942a07760b86e..53f2dd87c20f7 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -17,7 +17,6 @@ from cpython.datetime cimport ( tzinfo, ) from cpython.object cimport PyObject_Str -from cpython.version cimport PY_VERSION_HEX import_datetime() @@ -196,11 +195,9 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): ), stacklevel=4, ) - if PY_VERSION_HEX >= 0x03060100: - # In Python <= 3.6.0 there is no range checking for invalid dates - # in C api, thus we call faster C version for 3.6.1 or newer - return datetime_new(year, month, day, 0, 0, 0, 0, None), reso - return datetime(year, month, day, 0, 0, 0, 0, None), reso + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") @@ -573,7 +570,9 @@ cdef dateutil_parse( """ lifted from dateutil to get resolution""" cdef: - object res, attr, ret, tzdata + str attr + datetime ret + object res, tzdata object reso = None dict repl = {} @@ -637,7 +636,7 @@ def try_parse_dates( ) -> np.ndarray: cdef: Py_ssize_t i, n - object[:] result + object[::1] result n = len(values) result = np.empty(n, dtype='O') @@ -681,7 +680,7 @@ def try_parse_date_and_time( ) -> np.ndarray: cdef: Py_ssize_t i, n - object[:] result + object[::1] result n = len(dates) # TODO(cython3): Use len instead of `shape[0]` @@ -719,7 +718,7 @@ def try_parse_year_month_day( ) -> np.ndarray: cdef: Py_ssize_t i, n - object[:] result + object[::1] result n = len(years) # TODO(cython3): Use len instead of `shape[0]` @@ -742,7 +741,7 @@ def try_parse_datetime_components(object[:] years, cdef: Py_ssize_t i, n - object[:] result + object[::1] result int secs double float_secs double micros @@ -1095,7 +1094,7 @@ def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndar object[::1] iters_view flatiter it cnp.ndarray[object] result - object[:] result_view + object[::1] result_view if col_count == 0: return np.zeros(0, dtype=object) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 182502ba9ad7f..986bbd8c8f856 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -36,13 +36,13 @@ import cython from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, datetime, + import_datetime, ) # import datetime C API -PyDateTime_IMPORT +import_datetime() from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, @@ -62,7 +62,6 @@ cdef extern from "src/datetime/np_datetime.h": cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( @@ -104,7 +103,6 @@ from pandas._libs.tslibs.parsing import parse_time_string from pandas._libs.tslibs.nattype cimport ( NPY_NAT, - _nat_scalar_rules, c_NaT as NaT, c_nat_strings as nat_strings, checknull_with_nat, @@ -1673,7 +1671,7 @@ cdef class _Period(PeriodMixin): self._require_matching_freq(other) return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) elif other is NaT: - return _nat_scalar_rules[op] + return op == Py_NE elif util.is_array(other): # GH#44285 if cnp.PyArray_IsZeroDim(other): @@ -1828,10 +1826,10 @@ cdef class _Period(PeriodMixin): if end: if freq == "B" or self.freq == "B": # roll forward to ensure we land on B date - adjust = Timedelta(1, "D") - Timedelta(1, "ns") + adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns") return self.to_timestamp(how="start") + adjust endpoint = (self + self.freq).to_timestamp(how='start') - return endpoint - Timedelta(1, 'ns') + return endpoint - np.timedelta64(1, "ns") if freq is None: freq = self._dtype._get_to_timestamp_base() diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d214694fb659d..bc3e68671b7ec 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -72,8 +72,8 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors=' cdef: Py_ssize_t i, n = len(values) npy_datetimestruct dts - int64_t[:] iresult - object[:] result_timezone + int64_t[::1] iresult + object[::1] result_timezone int year, month, day, minute, hour, second, weekday, julian int week_of_year, week_of_year_start, parse_code, ordinal int iso_week, iso_year diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index fed1f2d326819..0da4b614dd379 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -11,7 +11,6 @@ cdef bint is_any_td_scalar(object obj) cdef class _Timedelta(timedelta): cdef readonly: int64_t value # nanoseconds - object freq # frequency reference bint is_populated # are my components populated int64_t _d, _h, _m, _s, _ms, _us, _ns diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 6b7ebf96633b3..479e6c9bb7efd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -22,12 +22,12 @@ cnp.import_array() from cpython.datetime cimport ( PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, + import_datetime, timedelta, ) -PyDateTime_IMPORT +import_datetime() cimport pandas._libs.tslibs.util as util @@ -826,7 +826,6 @@ cdef _to_py_int_float(v): cdef class _Timedelta(timedelta): # cdef readonly: # int64_t value # nanoseconds - # object freq # frequency reference # bint is_populated # are my components populated # int64_t _d, _h, _m, _s, _ms, _us, _ns diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index aad49bd70b120..2afceb827e49a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,10 +25,10 @@ cnp.import_array() from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below PyDate_Check, PyDateTime_Check, - PyDateTime_IMPORT, PyDelta_Check, PyTZInfo_Check, datetime, + import_datetime, time, tzinfo as tzinfo_type, ) @@ -43,7 +43,7 @@ from cpython.object cimport ( PyObject_RichCompareBool, ) -PyDateTime_IMPORT +import_datetime() from pandas._libs.tslibs cimport ccalendar from pandas._libs.tslibs.base cimport ABCTimestamp diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 1a1aa6dfec5a0..7efe9412e43b9 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -5,14 +5,14 @@ import cython from cython import Py_ssize_t from cpython.datetime cimport ( - PyDateTime_IMPORT, PyDelta_Check, datetime, + import_datetime, timedelta, tzinfo, ) -PyDateTime_IMPORT +import_datetime() from dateutil.tz import tzutc import numpy as np diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 150516aadffc6..492b7d519551f 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -14,7 +14,6 @@ cdef extern from *: cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil # functions, whereas `from cpython cimport` does not. - bint PyUnicode_Check(object obj) nogil bint PyBool_Check(object obj) nogil bint PyFloat_Check(object obj) nogil bint PyComplex_Check(object obj) nogil diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index ada6d7f6495bf..3f47a19563b61 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -89,7 +89,7 @@ def ints_to_pydatetime( int64_t* tdata = NULL intp_t pos npy_datetimestruct dts - object dt, new_tz + tzinfo new_tz str typ int64_t value, local_val, delta = NPY_NAT # dummy for delta ndarray[object] result = np.empty(n, dtype=object) @@ -190,6 +190,8 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY +@cython.wraparound(False) +@cython.boundscheck(False) def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: cdef: Py_ssize_t i, ntrans=-1, n = len(stamps) @@ -259,7 +261,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t """ cdef: Py_ssize_t i, ntrans =- 1, n = len(stamps) - int64_t[:] result = np.empty(n, dtype=np.int64) + int64_t[::1] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL @@ -369,7 +371,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): cdef: Py_ssize_t i, ntrans =- 1, n = len(stamps) - int64_t[:] result = np.empty(n, dtype=np.int64) + int64_t[::1] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5c6fa8d771210..25b7a5c3d3689 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -92,16 +92,12 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): freqstr: str | None _resolution_obj: Resolution - # error: "Callable[[Any], Any]" has no attribute "fget" - hasnans = cast( - bool, - cache_readonly( - DatetimeLikeArrayMixin._hasna.fget # type: ignore[attr-defined] - ), - ) - # ------------------------------------------------------------------------ + @cache_readonly + def hasnans(self) -> bool: + return self._data._hasna + def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 2a660583f1396..0f450adf67b23 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1357,6 +1357,7 @@ def test_null_group_str_reducer(request, dropna, reduction_func): tm.assert_equal(result, expected) +@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_null_group_str_transformer( request, using_array_manager, dropna, transformation_func ): From 175cd09c44bf591fd4179b4462fc92b059f30169 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Mar 2022 10:35:44 -0700 Subject: [PATCH 2/3] remove Timedelta.freq from doc --- doc/redirects.csv | 1 - doc/source/reference/arrays.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/doc/redirects.csv b/doc/redirects.csv index 9b8a5a73dedff..ead626ee029b1 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -1296,7 +1296,6 @@ generated/pandas.Timedelta.components,../reference/api/pandas.Timedelta.componen generated/pandas.Timedelta.days,../reference/api/pandas.Timedelta.days generated/pandas.Timedelta.delta,../reference/api/pandas.Timedelta.delta generated/pandas.Timedelta.floor,../reference/api/pandas.Timedelta.floor -generated/pandas.Timedelta.freq,../reference/api/pandas.Timedelta.freq generated/pandas.Timedelta,../reference/api/pandas.Timedelta generated/pandas.TimedeltaIndex.ceil,../reference/api/pandas.TimedeltaIndex.ceil generated/pandas.TimedeltaIndex.components,../reference/api/pandas.TimedeltaIndex.components diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 1b8e0fdb856b5..2fd4cb41df5d3 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -185,7 +185,6 @@ Properties Timedelta.components Timedelta.days Timedelta.delta - Timedelta.freq Timedelta.is_populated Timedelta.max Timedelta.microseconds From 59e7656e8367c308cfa655d9acd3ec17e0b8a824 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Mar 2022 18:30:42 -0700 Subject: [PATCH 3/3] revert Timedelta.freq removal --- doc/redirects.csv | 1 + doc/source/reference/arrays.rst | 1 + pandas/_libs/tslibs/timedeltas.pxd | 1 + pandas/_libs/tslibs/timedeltas.pyx | 1 + 4 files changed, 4 insertions(+) diff --git a/doc/redirects.csv b/doc/redirects.csv index ead626ee029b1..9b8a5a73dedff 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -1296,6 +1296,7 @@ generated/pandas.Timedelta.components,../reference/api/pandas.Timedelta.componen generated/pandas.Timedelta.days,../reference/api/pandas.Timedelta.days generated/pandas.Timedelta.delta,../reference/api/pandas.Timedelta.delta generated/pandas.Timedelta.floor,../reference/api/pandas.Timedelta.floor +generated/pandas.Timedelta.freq,../reference/api/pandas.Timedelta.freq generated/pandas.Timedelta,../reference/api/pandas.Timedelta generated/pandas.TimedeltaIndex.ceil,../reference/api/pandas.TimedeltaIndex.ceil generated/pandas.TimedeltaIndex.components,../reference/api/pandas.TimedeltaIndex.components diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 2fd4cb41df5d3..1b8e0fdb856b5 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -185,6 +185,7 @@ Properties Timedelta.components Timedelta.days Timedelta.delta + Timedelta.freq Timedelta.is_populated Timedelta.max Timedelta.microseconds diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 0da4b614dd379..fed1f2d326819 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -11,6 +11,7 @@ cdef bint is_any_td_scalar(object obj) cdef class _Timedelta(timedelta): cdef readonly: int64_t value # nanoseconds + object freq # frequency reference bint is_populated # are my components populated int64_t _d, _h, _m, _s, _ms, _us, _ns diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 479e6c9bb7efd..76a14a56e1cc2 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -826,6 +826,7 @@ cdef _to_py_int_float(v): cdef class _Timedelta(timedelta): # cdef readonly: # int64_t value # nanoseconds + # object freq # frequency reference # bint is_populated # are my components populated # int64_t _d, _h, _m, _s, _ms, _us, _ns