From a2a575baf36ba62e9de1f4b054666e9cc4986fa7 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Tue, 1 Nov 2022 03:18:45 +0530 Subject: [PATCH 01/17] gh 94906 gh 94906 --- .pre-commit-config.yaml | 12 +++--- pandas/_libs/algos.pyx | 57 +++++++++++++++++++--------- pandas/_libs/groupby.pyx | 21 +++++----- pandas/_libs/internals.pyx | 3 +- pandas/_libs/join.pyx | 6 +-- pandas/_libs/lib.pyx | 32 ++++++++++------ pandas/_libs/testing.pyx | 6 ++- pandas/_libs/tslib.pyx | 11 ++++-- pandas/_libs/tslibs/dtypes.pyx | 3 +- pandas/_libs/tslibs/fields.pyx | 3 +- pandas/_libs/tslibs/nattype.pyx | 13 ++++--- pandas/_libs/tslibs/np_datetime.pyx | 9 +++-- pandas/_libs/tslibs/offsets.pyx | 15 +++++--- pandas/_libs/tslibs/parsing.pyx | 8 +++- pandas/_libs/tslibs/period.pyx | 11 +++--- pandas/_libs/tslibs/strptime.pyx | 1 - pandas/_libs/tslibs/timedeltas.pyx | 15 +++++--- pandas/_libs/tslibs/timestamps.pyx | 8 ++-- pandas/_libs/tslibs/tzconversion.pyx | 5 +-- pandas/_libs/tslibs/vectorized.pyx | 2 +- pandas/_libs/window/aggregations.pyx | 18 +++++---- pandas/errors/__init__.py | 4 +- pandas/io/sas/byteswap.pyx | 3 +- pandas/io/sas/sas.pyx | 10 +++-- 24 files changed, 167 insertions(+), 109 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ff7526b87521..1893f57fc09c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,16 +18,16 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/python/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 + rev: v2.2.2 hooks: - id: codespell types_or: [python, rst, markdown] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.1.8 + rev: v0.2.1 hooks: - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks @@ -60,7 +60,7 @@ repos: - flake8-bugbear==22.7.1 - pandas-dev-flaker==0.5.0 - repo: https://github.com/pycqa/pylint - rev: v2.15.3 + rev: v2.15.5 hooks: - id: pylint - repo: https://github.com/PyCQA/isort @@ -68,7 +68,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.38.2 + rev: v3.2.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -83,7 +83,7 @@ repos: types: [text] # overwrite types: [rst] types_or: [python, rst] - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.6.1 + rev: v0.6.7 hooks: - id: sphinx-lint - repo: https://github.com/asottile/yesqa diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 96c47471aaf90..d764ddd515301 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -81,26 +81,48 @@ class Infinity: """ Provide a positive Infinity comparison method for ranking. """ - __lt__ = lambda self, other: False - __le__ = lambda self, other: isinstance(other, Infinity) - __eq__ = lambda self, other: isinstance(other, Infinity) - __ne__ = lambda self, other: not isinstance(other, Infinity) - __gt__ = lambda self, other: (not isinstance(other, Infinity) and - not missing.checknull(other)) - __ge__ = lambda self, other: not missing.checknull(other) + def __lt__(self, other): + return False + + def __le__(self, other): + return isinstance(other, Infinity) + + def __eq__(self, other): + return isinstance(other, Infinity) + + def __ne__(self, other): + return not isinstance(other, Infinity) + + def __gt__(self, other): + return (not isinstance(other, Infinity) and + not missing.checknull(other)) + + def __ge__(self, other): + return not missing.checknull(other) class NegInfinity: """ Provide a negative Infinity comparison method for ranking. """ - __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and - not missing.checknull(other)) - __le__ = lambda self, other: not missing.checknull(other) - __eq__ = lambda self, other: isinstance(other, NegInfinity) - __ne__ = lambda self, other: not isinstance(other, NegInfinity) - __gt__ = lambda self, other: False - __ge__ = lambda self, other: isinstance(other, NegInfinity) + def __lt__(self, other): + return (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + + def __le__(self, other): + return not missing.checknull(other) + + def __eq__(self, other): + return isinstance(other, NegInfinity) + + def __ne__(self, other): + return not isinstance(other, NegInfinity) + + def __gt__(self, other): + return False + + def __ge__(self, other): + return isinstance(other, NegInfinity) @cython.wraparound(False) @@ -321,7 +343,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: @cython.cdivision(True) def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K bint minpv float64_t[:, ::1] result ndarray[uint8_t, ndim=2] mask @@ -377,7 +399,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): @cython.wraparound(False) def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray: cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K ndarray[float64_t, ndim=2] result ndarray[float64_t, ndim=2] ranked_mat ndarray[float64_t, ndim=1] rankedx, rankedy @@ -746,7 +768,8 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and arr[0] == NPY_NAT): + if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and + arr[0] == NPY_NAT): # single value is NaN return False, False, True else: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index f798655e9d922..5b50aaf87cdc3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -265,7 +265,7 @@ def group_cumprod( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K, int64float_t val, na_val int64float_t[:, ::1] accum intp_t lab @@ -356,7 +356,7 @@ def group_cumsum( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K, int64float_t val, y, t, na_val int64float_t[:, ::1] accum, compensation uint8_t[:, ::1] accum_mask @@ -441,7 +441,7 @@ def group_shift_indexer( int periods, ) -> None: cdef: - Py_ssize_t N, i, j, ii, lab + Py_ssize_t N, i, ii, lab int offset = 0, sign int64_t idxer, idxer_slot int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64) @@ -744,7 +744,7 @@ def group_sum( if uses_mask: isna_entry = mask[i, j] elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + or sum_t is complex64_t or sum_t is complex64_t): # avoid warnings because of equality comparison isna_entry = not val == val elif sum_t is int64_t and is_datetimelike and val == NPY_NAT: @@ -1042,10 +1042,10 @@ def group_ohlc( Only aggregates on axis=0 """ cdef: - Py_ssize_t i, j, N, K, lab + Py_ssize_t i, N, K, lab int64float_t val uint8_t[::1] first_element_set - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = is not mask is None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1240,7 +1240,8 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): +cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, + bint is_datetimelike): """ Find either the min or the max supported by numeric_object_t; 'val' is a placeholder to effectively make numeric_object_t an argument. @@ -1366,7 +1367,8 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif numeric_object_t is float32_t or numeric_object_t is + float64_t: out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in @@ -1486,7 +1488,8 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif numeric_object_t is float32_t or numeric_object_t is + float64_t: out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 1a98633908a49..747f57e6ba4c0 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -133,7 +133,7 @@ cdef class BlockPlacement: @property def as_array(self) -> np.ndarray: cdef: - Py_ssize_t start, stop, end, _ + Py_ssize_t start, stop, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) @@ -259,7 +259,6 @@ cdef class BlockPlacement: """ cdef: slice slc = self._ensure_has_slice() - slice new_slice ndarray[intp_t, ndim=1] new_placement if slc is not None and slc.step == 1: diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index e574aa10f6b57..1f2d717cab88c 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -275,7 +275,7 @@ def left_join_indexer_unique( cdef: Py_ssize_t i, j, nleft, nright ndarray[intp_t] indexer - numeric_object_t lval, rval + numeric_object_t, rval i = 0 j = 0 @@ -324,7 +324,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] is non-unique (if both were unique we'd use left_join_indexer_unique). """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result @@ -434,7 +434,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] Both left and right are monotonic increasing but not necessarily unique. """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 188b531b2b469..1519255c622a9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -621,6 +621,8 @@ ctypedef fused ndarr_object: # TODO: get rid of this in StringArray and modify # and go through ensure_string_array instead + + @cython.wraparound(False) @cython.boundscheck(False) def convert_nans_to_NA(ndarr_object arr) -> ndarray: @@ -765,9 +767,9 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, vc + Py_ssize_t lenidx, lenbin, i, j, bc, ndarray[int64_t, ndim=1] bins - int64_t l_bin, r_bin, nat_count + int64_t, r_bin, nat_count bint right_closed = closed == 'right' nat_count = 0 @@ -2212,14 +2214,19 @@ def maybe_convert_numeric( # Otherwise, iterate and do full inference. cdef: - int status, maybe_int + int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0) - ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_COMPLEX128, 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, + 0) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_COMPLEX128, 0) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, + 0) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, + 0) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, + 0) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable @@ -2298,7 +2305,7 @@ def maybe_convert_numeric( seen.float_ = True else: try: - status = floatify(val, &fval, &maybe_int) + # status = floatify(val, &fval, &maybe_int) if fval in na_values: seen.saw_null() @@ -2437,7 +2444,7 @@ def maybe_convert_objects(ndarray[object] objects, int64_t[::1] itimedeltas Seen seen = Seen() object val - float64_t fval, fnan = np.nan + float64_t, fnan = np.nan n = len(objects) @@ -2917,7 +2924,7 @@ def to_object_array(rows: object, min_width: int = 0) -> ndarray: def tuples_to_object_array(ndarray[object] tuples): cdef: - Py_ssize_t i, j, n, k, tmp + Py_ssize_t i, j, n, k, ndarray[object, ndim=2] result tuple tup @@ -3045,7 +3052,8 @@ cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): key is assumed to have `not isna(key)` """ cdef: - ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0) + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, + cnp.NPY_BOOL, 0) Py_ssize_t i object item diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 679cde9932a7a..20bb065ef0c49 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -161,13 +161,15 @@ cpdef assert_almost_equal(a, b, is_unequal = True diff += 1 if not first_diff: - first_diff = f"At positional index {i}, first diff: {a[i]} != {b[i]}" + first_diff = f"At positional index {i}, + first diff: {a[i]} != {b[i]}" if is_unequal: from pandas._testing import raise_assert_detail msg = (f"{obj} values are different " f"({np.round(diff * 100.0 / na, 5)} %)") - raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values) + raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, + index_values=index_values) return True diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d7c0c91332e02..5c2504e7ef78a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -260,7 +260,7 @@ def array_with_unit_to_datetime( tz : parsed timezone offset or None """ cdef: - Py_ssize_t i, j, n=len(values) + Py_ssize_t i, n=len(values) int64_t mult int prec = 0 ndarray[float64_t] fvalues @@ -417,6 +417,7 @@ def array_with_unit_to_datetime( return oresult, tz + @cython.wraparound(False) @cython.boundscheck(False) def first_non_null(values: ndarray) -> int: @@ -424,7 +425,6 @@ def first_non_null(values: ndarray) -> int: cdef: Py_ssize_t n = len(values) Py_ssize_t i - int result for i in range(n): val = values[i] if checknull_with_nat_and_na(val): @@ -435,6 +435,7 @@ def first_non_null(values: ndarray) -> int: else: return -1 + @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime( @@ -609,7 +610,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't match format specified" + f"time data \"{val}\" at position {i} doesn't match + format specified" ) return values, tz_out @@ -625,7 +627,8 @@ cpdef array_to_datetime( if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError(f"invalid string coercion to datetime for \"{val}\" at position {i}") + raise TypeError(f"invalid string coercion to datetime for \"{val}\" + at position {i}") if tz is not None: seen_datetime_offset = True diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 94781374296fa..0693a142ec4f0 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -396,7 +396,8 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: # TODO: use in _matplotlib.converter? -cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: +cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) + except? -1: """ How many of the given time units fit into a single day? """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3c7406d231241..be39ec7334be1 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -325,7 +325,8 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field, NPY_DATETIMEUNIT reso=NPY_FR_ns): +def get_date_field(const int64_t[:] dtindex, str field, + NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 79299ec38e19c..e25703e870595 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -203,10 +203,11 @@ cdef class _NaT(datetime): result.fill("NaT") return result - # __rsub__ logic here - # TODO(cython3): remove this, move above code out of ``if not is_rsub`` block - # timedelta64 - NaT we have to treat NaT as timedelta64 - # for this to be meaningful, and the result is timedelta64 + """__rsub__ logic here + TODO(cython3): remove this, move above code out of + ``if not is_rsub`` block + timedelta64 - NaT we have to treat NaT as timedelta64 + for this to be meaningful, and the result is timedelta64""" result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result @@ -240,7 +241,8 @@ cdef class _NaT(datetime): result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result - # other cases are same, swap operands is allowed even though we subtract because this is NaT + """" other cases are same, swap operands is allowed even though we subtract + because this is NaT""" return self.__sub__(other) def __pos__(self): @@ -1201,6 +1203,7 @@ default 'raise' NaT """, ) + @property def tz(self) -> None: return None diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 07872050dc822..bf5cdd4a0de68 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -46,7 +46,7 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS - PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -171,7 +171,8 @@ class OutOfBoundsTimedelta(ValueError): pass -cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): +cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, + npy_datetimestruct *upper): if reso == NPY_FR_ns: upper[0] = _NS_MAX_DTS lower[0] = _NS_MIN_DTS @@ -420,7 +421,6 @@ def compare_mismatched_resolutions(ndarray left, ndarray right, op): Py_ssize_t i, N = left.size npy_datetimestruct ldts, rdts - for i in range(N): # Analogous to: lval = lvalues[i] lval = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -511,7 +511,8 @@ cdef ndarray astype_round_check( @cython.overflowcheck(True) -cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1: +cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) + except? -1: """ Find the factor by which we need to multiply to convert from from_unit to to_unit. """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 37b87f92971cc..50f0b4efcba42 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2268,7 +2268,8 @@ cdef class QuarterOffset(SingleConstructorOffset): def _apply_array(self, dtarr): reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso + dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, + reso=reso ) return shifted @@ -2548,7 +2549,8 @@ cdef class SemiMonthOffset(SingleConstructorOffset): ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + 0) npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month @@ -2756,7 +2758,8 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = i8other.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + 0) npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday @@ -3328,7 +3331,8 @@ cdef class FY5253Quarter(FY5253Mixin): for qlen in qtr_lens: if qlen * 7 <= tdelta.days: num_qtrs += 1 - tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso(norm._creso) + tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso( + norm._creso) else: break else: @@ -4145,7 +4149,8 @@ cdef ndarray _shift_bdays( """ cdef: Py_ssize_t i, n = i8other.size - ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + 0) int64_t val, res_val int wday, nadj, days npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 1312124cfb77b..966a22e8d85bf 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -418,7 +418,8 @@ cdef parse_datetime_string_with_reso( from pandas import Timestamp parsed = Timestamp(date_string) else: - parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, + dts.us) reso = { NPY_DATETIMEUNIT.NPY_FR_Y: "year", NPY_DATETIMEUNIT.NPY_FR_M: "month", @@ -717,7 +718,8 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + def parse_date(x): + return du_parse(x, dayfirst=dayfirst, default=default) # EAFP here try: @@ -1050,6 +1052,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: else: return None + cdef str _fill_token(token: str, padding: int): cdef str token_filled if '.' not in token: @@ -1064,6 +1067,7 @@ cdef str _fill_token(token: str, padding: int): token_filled = f'{seconds}.{nanoseconds}' return token_filled + @cython.wraparound(False) @cython.boundscheck(False) cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index be6f87791284e..e709256d81494 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1053,7 +1053,8 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): cdef: Py_ssize_t n = len(arr) Py_ssize_t increment = arr.strides[0] // 8 - ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, 0) + ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, + 0) _period_asfreq( cnp.PyArray_DATA(arr), @@ -1362,7 +1363,6 @@ def get_period_field_arr(str field, const int64_t[:] arr, int freq): cdef: Py_ssize_t i, sz int64_t[::1] out - accessor f func = _get_accessor_func(field) if func is NULL: @@ -1438,7 +1438,8 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: cdef: Py_ssize_t i, n = values.size int64_t ordinal - ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, + cnp.NPY_INT64, 0) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p @@ -2478,7 +2479,8 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section ` in the user docs. + strings are listed in the :ref:`offset alias section ` + in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. year : int, default None @@ -2511,7 +2513,6 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self if freq is not None: freq = cls._maybe_convert_freq(freq) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 6287c2fbc5d34..f540ad19c48d2 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -75,7 +75,6 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai int iso_week, iso_year int64_t us, ns object val, group_key, ampm, found, timezone - dict found_key bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f3de67b705d4d..297a291a52a66 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -176,7 +176,8 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) + ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, + cnp.NPY_OBJECT, 0) object[::1] res_flat = result.ravel() # should NOT be a copy ndarray arr = m8values.view("i8") @@ -468,7 +469,8 @@ cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: return parse_timedelta_string(item) -cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1: +cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) + except? -1: """ See array_to_timedelta64. """ @@ -1570,7 +1572,7 @@ class Timedelta(_Timedelta): "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base + cdef _Timedelta if value is _no_input: if not len(kwargs): @@ -1625,7 +1627,8 @@ class Timedelta(_Timedelta): if len(kwargs): # GH#48898 raise ValueError( - "Cannot pass both a Timedelta input and timedelta keyword arguments, got " + "Cannot pass both a Timedelta input and timedelta keyword + arguments, got " f"{list(kwargs.keys())}" ) return value @@ -1712,7 +1715,7 @@ class Timedelta(_Timedelta): @cython.cdivision(True) def _round(self, freq, mode): cdef: - int64_t result, unit, remainder + int64_t result, unit, ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset @@ -1802,7 +1805,7 @@ class Timedelta(_Timedelta): def __truediv__(self, other): cdef: - int64_t new_value + int64_t if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3c3bb8496aa6e..f16fd80aaaae2 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -267,7 +267,6 @@ cdef class _Timestamp(ABCTimestamp): @classmethod def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz): cdef: - npy_datetimestruct dts _TSObject obj = _TSObject() if value == NPY_NAT: @@ -294,7 +293,6 @@ cdef class _Timestamp(ABCTimestamp): # This is herely mainly so we can incrementally implement non-nano # (e.g. only tznaive at first) cdef: - npy_datetimestruct dts int64_t value NPY_DATETIMEUNIT reso @@ -317,7 +315,6 @@ cdef class _Timestamp(ABCTimestamp): def __richcmp__(_Timestamp self, object other, int op): cdef: _Timestamp ots - int ndim if isinstance(other, _Timestamp): ots = other @@ -1532,7 +1529,7 @@ class Timestamp(_Timestamp): if (is_integer_object(tz) and is_integer_object(ts_input) and is_integer_object(freq) - ): + ): # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) # TODO(GH#45307): this will still be fragile to # mixed-and-matched positional/keyword arguments @@ -1675,7 +1672,8 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, + ts.creso) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e2812178a2b43..9fea860500b2e 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -224,14 +224,13 @@ timedelta-like} """ cdef: ndarray[uint8_t, cast=True] ambiguous_array - Py_ssize_t i, idx, pos, n = vals.shape[0] - Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + Py_ssize_t i, n = vals.shape[0] + Py_ssize_t delta_idx_offset, delta_idx, int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result - npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False bint fill_nonexist = False diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 6a6b156af3dc4..0a16cf38eecca 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -155,7 +155,7 @@ def ints_to_pydatetime( elif box == "timestamp": use_ts = True elif box == "time": - use_time = True + # use_time = True elif box == "datetime": use_pydt = True else: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 68c05f2bb2c98..589dcd0eb3b54 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -172,7 +172,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, add_sum(values[j], &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, + prev_value) if not is_monotonic_increasing_bounds: nobs = 0 @@ -296,7 +297,8 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, + prev_value) if not is_monotonic_increasing_bounds: nobs = 0 @@ -566,7 +568,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, min_val, mean_val, sum_val = 0 + float64_t val, min_val, mean_val, sum_val = 0 float64_t compensation_xxx_add, compensation_xxx_remove float64_t compensation_xx_add, compensation_xx_remove float64_t compensation_x_add, compensation_x_remove @@ -574,7 +576,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, float64_t prev_value int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 int64_t s, e, num_consecutive_same_value - ndarray[float64_t] output, mean_array, values_copy + ndarray[float64_t] output, values_copy bint is_monotonic_increasing_bounds minp = max(minp, 3) @@ -779,7 +781,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, mean_val, min_val, sum_val = 0 + float64_t val, mean_val, min_val, sum_val = 0 float64_t compensation_xxxx_add, compensation_xxxx_remove float64_t compensation_xxx_remove, compensation_xxx_add float64_t compensation_xx_remove, compensation_xx_add @@ -876,7 +878,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 int64_t nobs = 0, N = len(start), s, e, win - float64_t val, res, prev + float64_t val, res, skiplist_t *sl ndarray[float64_t] output @@ -1149,7 +1151,7 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, Py_ssize_t i, j, s, e, N = len(start), idx int ret = 0 int64_t nobs = 0, win - float64_t val, prev, midpoint, idx_with_fraction + float64_t val, idx_with_fraction float64_t vlow, vhigh skiplist_t *skiplist InterpolationType interpolation_type @@ -1275,7 +1277,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start, derived from roll_quantile """ cdef: - Py_ssize_t i, j, s, e, N = len(start), idx + Py_ssize_t i, j, s, e, N = len(start), float64_t rank_min = 0, rank = 0 int64_t nobs = 0, win float64_t val diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3e4f116953cb3..89ac1c10254cb 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -283,7 +283,7 @@ class SettingWithCopyError(ValueError): The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, @@ -306,7 +306,7 @@ class SettingWithCopyWarning(Warning): 'Warn' is the default option. This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx index 4620403910274..79359ce5411b5 100644 --- a/pandas/io/sas/byteswap.pyx +++ b/pandas/io/sas/byteswap.pyx @@ -1,5 +1,6 @@ """ -The following are faster versions of struct.unpack that avoid the overhead of Python function calls. +The following are faster versions of struct.unpack that avoid the overhead of Python +function calls. In the SAS7BDAT parser, they may be called up to (n_rows * n_cols) times. """ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 9406900b69998..ea70453c43c11 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -253,8 +253,10 @@ cdef: def _init_subheader_signatures(): - subheaders_32bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 4] - subheaders_64bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 8] + subheaders_32bit = [(sig, idx) for sig, idx in + const.subheader_signature_to_index.items() if len(sig) == 4] + subheaders_64bit = [(sig, idx) for sig, idx in + const.subheader_signature_to_index.items() if len(sig) == 8] assert len(subheaders_32bit) == 13 assert len(subheaders_64bit) == 17 assert len(const.subheader_signature_to_index) == 13 + 17 @@ -366,7 +368,6 @@ cdef class Parser: def read(self, int nrows): cdef: bint done - int i for _ in range(nrows): done = self.readline() @@ -490,7 +491,8 @@ cdef class Parser: rpos = self.decompress(source, decompressed_source) if rpos != self.row_length: raise ValueError( - f"Expected decompressed line of length {self.row_length} bytes but decompressed {rpos} bytes" + f"Expected decompressed line of length {self.row_length} bytes but + decompressed {rpos} bytes" ) source = decompressed_source From 9a1a81bb6c80cbc24a1d1a7430120faec64b8f6c Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Tue, 1 Nov 2022 17:06:49 +0530 Subject: [PATCH 02/17] update --- pandas/_libs/algos.pyx | 6 +++--- pandas/_libs/groupby.pyx | 8 ++++---- pandas/_libs/lib.pyx | 24 ++++++++++++------------ pandas/_libs/testing.pyx | 6 +++--- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 6 +++--- pandas/_libs/tslibs/offsets.pyx | 8 ++++---- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 11 ++++++----- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- pandas/_libs/window/aggregations.pyx | 20 ++++++++++++-------- pandas/io/sas/byteswap.pyx | 2 +- 12 files changed, 53 insertions(+), 48 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index d764ddd515301..61f89185b0687 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -95,7 +95,7 @@ class Infinity: def __gt__(self, other): return (not isinstance(other, Infinity) and - not missing.checknull(other)) + not missing.checknull(other)) def __ge__(self, other): return not missing.checknull(other) @@ -107,7 +107,7 @@ class NegInfinity: """ def __lt__(self, other): return (not isinstance(other, NegInfinity) and - not missing.checknull(other)) + not missing.checknull(other)) def __le__(self, other): return not missing.checknull(other) @@ -768,7 +768,7 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and + if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and arr[0] == NPY_NAT): # single value is NaN return False, False, True diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5b50aaf87cdc3..01b655f32b548 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1367,8 +1367,8 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif numeric_object_t is float32_t or numeric_object_t is - float64_t: + elif (numeric_object_t is float32_t or + numeric_object_t is float64_t): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in @@ -1488,8 +1488,8 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif numeric_object_t is float32_t or numeric_object_t is - float64_t: + elif (numeric_object_t is float32_t or + numeric_object_t is float64_t): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1519255c622a9..319abf8118561 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -767,7 +767,7 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, + Py_ssize_t lenidx, lenbin, i, j, bc, ndarray[int64_t, ndim=1] bins int64_t, r_bin, nat_count bint right_closed = closed == 'right' @@ -2217,16 +2217,16 @@ def maybe_convert_numeric( int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0) - ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_COMPLEX128, 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, - 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, - 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, - 0) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_COMPLEX128, 0) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_INT64, 0) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_UINT64, 0) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_UINT8, 0) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable @@ -2924,7 +2924,7 @@ def to_object_array(rows: object, min_width: int = 0) -> ndarray: def tuples_to_object_array(ndarray[object] tuples): cdef: - Py_ssize_t i, j, n, k, + Py_ssize_t i, j, n, k, ndarray[object, ndim=2] result tuple tup @@ -3052,7 +3052,7 @@ cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): key is assumed to have `not isna(key)` """ cdef: - ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0) Py_ssize_t i object item diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 20bb065ef0c49..dbfb8a96c495f 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -161,14 +161,14 @@ cpdef assert_almost_equal(a, b, is_unequal = True diff += 1 if not first_diff: - first_diff = f"At positional index {i}, - first diff: {a[i]} != {b[i]}" + first_diff = (f"At positional index {i}, " + f"first diff: {a[i]} != {b[i]}") if is_unequal: from pandas._testing import raise_assert_detail msg = (f"{obj} values are different " f"({np.round(diff * 100.0 / na, 5)} %)") - raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, + raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values) return True diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index be39ec7334be1..b4700c494a3fa 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -325,7 +325,7 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field, +def get_date_field(const int64_t[:] dtindex, str field,. NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Given a int64-based datetime index, extract the year, month, etc., diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index e25703e870595..877979239bfa3 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -204,7 +204,7 @@ cdef class _NaT(datetime): return result """__rsub__ logic here - TODO(cython3): remove this, move above code out of + TODO(cython3): remove this, move above code out of ``if not is_rsub`` block timedelta64 - NaT we have to treat NaT as timedelta64 for this to be meaningful, and the result is timedelta64""" @@ -241,8 +241,8 @@ cdef class _NaT(datetime): result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result - """" other cases are same, swap operands is allowed even though we subtract - because this is NaT""" + # other cases are same, swap operands is allowed even though we subtract + # because this is NaT return self.__sub__(other) def __pos__(self): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 50f0b4efcba42..d6abeacb6f878 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2268,7 +2268,7 @@ cdef class QuarterOffset(SingleConstructorOffset): def _apply_array(self, dtarr): reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, + dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso ) return shifted @@ -2549,7 +2549,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) npy_datetimestruct dts int months, to_day, nadj, n = self.n @@ -2758,7 +2758,7 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = i8other.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) npy_datetimestruct dts int wday, days, weeks, n = self.n @@ -4149,7 +4149,7 @@ cdef ndarray _shift_bdays( """ cdef: Py_ssize_t i, n = i8other.size - ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, + ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) int64_t val, res_val int wday, nadj, days diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 966a22e8d85bf..ed65d36b01cd5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -418,7 +418,7 @@ cdef parse_datetime_string_with_reso( from pandas import Timestamp parsed = Timestamp(date_string) else: - parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, + parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) reso = { NPY_DATETIMEUNIT.NPY_FR_Y: "year", diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e709256d81494..ecfaf0f96cf6d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1053,7 +1053,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): cdef: Py_ssize_t n = len(arr) Py_ssize_t increment = arr.strides[0] // 8 - ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, + ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, 0) _period_asfreq( @@ -1438,7 +1438,7 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: cdef: Py_ssize_t i, n = values.size int64_t ordinal - ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, + ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p @@ -1685,7 +1685,8 @@ cdef class _Period(PeriodMixin): raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") - if util.is_timedelta64_object(other) and get_timedelta64_value(other) == NPY_NAT: + if util.is_timedelta64_object(other) and + get_timedelta64_value(other) == NPY_NAT: # i.e. np.timedelta64("nat") return NaT @@ -2479,8 +2480,8 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section ` - in the user docs. + strings are listed in the :ref:`offset alias section + ` in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. year : int, default None diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a66faec07dfbf..c6842ce804b2f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -366,8 +366,8 @@ cdef class _Timestamp(ABCTimestamp): elif op == Py_NE: return True raise TypeError("Cannot compare Timestamp with datetime.date. " - "Use ts == pd.Timestamp(date) or ts.date() == date instead." - ) + "Use ts == pd.Timestamp(date) or ts.date() == date instead." + ) else: return NotImplemented @@ -1670,7 +1670,7 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 589dcd0eb3b54..8ee6a93eeba77 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -172,7 +172,7 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, add_sum(values[j], &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, + output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) if not is_monotonic_increasing_bounds: @@ -212,7 +212,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: + int64_t *num_consecutive_same_value, float64_t *prev_value) + nogil: """ add a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -297,7 +298,7 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, + output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value) if not is_monotonic_increasing_bounds: @@ -312,7 +313,8 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, - float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: + float64_t ssqdm_x, int64_t num_consecutive_same_value) + nogil: cdef: float64_t result @@ -332,7 +334,8 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: + int64_t *num_consecutive_same_value, float64_t *prev_value) + nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -855,7 +858,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, &compensation_xxx_add, &compensation_xxxx_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value) + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, + num_consecutive_same_value) if not is_monotonic_increasing_bounds: nobs = 0 @@ -878,7 +882,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 int64_t nobs = 0, N = len(start), s, e, win - float64_t val, res, + float64_t val, res, skiplist_t *sl ndarray[float64_t] output @@ -1277,7 +1281,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start, derived from roll_quantile """ cdef: - Py_ssize_t i, j, s, e, N = len(start), + Py_ssize_t i, j, s, e, N = len(start), float64_t rank_min = 0, rank = 0 int64_t nobs = 0, win float64_t val diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx index 79359ce5411b5..2a4d3f66a5d7d 100644 --- a/pandas/io/sas/byteswap.pyx +++ b/pandas/io/sas/byteswap.pyx @@ -1,5 +1,5 @@ """ -The following are faster versions of struct.unpack that avoid the overhead of Python +The following are faster versions of struct.unpack that avoid the overhead of Python function calls. In the SAS7BDAT parser, they may be called up to (n_rows * n_cols) times. From c25d0a9088152ed0c6e8ddcbc7c83ea952f238f1 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Tue, 1 Nov 2022 17:52:00 +0530 Subject: [PATCH 03/17] updates --- pandas/_libs/algos.pyx | 4 +-- pandas/_libs/lib.pyx | 13 ++++----- pandas/_libs/tslibs/timestamps.pyx | 5 ++-- pandas/_libs/window/aggregations.pyx | 40 +++++++++++++++++++--------- 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 61f89185b0687..7b9fe6422544c 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -95,7 +95,7 @@ class Infinity: def __gt__(self, other): return (not isinstance(other, Infinity) and - not missing.checknull(other)) + not missing.checknull(other)) def __ge__(self, other): return not missing.checknull(other) @@ -107,7 +107,7 @@ class NegInfinity: """ def __lt__(self, other): return (not isinstance(other, NegInfinity) and - not missing.checknull(other)) + not missing.checknull(other)) def __le__(self, other): return not missing.checknull(other) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 319abf8118561..6cb5e711ad2b3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2217,15 +2217,16 @@ def maybe_convert_numeric( int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, - 0) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_FLOAT64, 0) ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_COMPLEX128, 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, + cnp.NPY_COMPLEX128, + 0) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c6842ce804b2f..1e31a0bf2e199 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1525,9 +1525,8 @@ class Timestamp(_Timestamp): # by the cython annotation. if tz is not None: if (is_integer_object(tz) - and is_integer_object(ts_input) - and is_integer_object(freq) - ): + and is_integer_object(ts_input) + and is_integer_object(freq)): # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) # TODO(GH#45307): this will still be fragile to # mixed-and-matched positional/keyword arguments diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 8ee6a93eeba77..76d1c6cbdd952 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -210,10 +210,15 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct return result -cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, - Py_ssize_t *neg_ct, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) - nogil: +cdef inline void add_mean( + float64_t val, + Py_ssize_t *nobs, + float64_t *sum_x, + Py_ssize_t *neg_ct, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value) + nogil: """ add a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -313,8 +318,7 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, - float64_t ssqdm_x, int64_t num_consecutive_same_value) - nogil: + float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: cdef: float64_t result @@ -332,10 +336,15 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, return result -cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) - nogil: +cdef inline void add_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value) + nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -369,8 +378,13 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) -cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation) nogil: +cdef inline void remove_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation) + nogil: """ remove a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -858,7 +872,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, &compensation_xxx_add, &compensation_xxxx_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value) if not is_monotonic_increasing_bounds: From 167b94278f8f800aec5ec074d896fe596eb2691c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 11:16:42 +0000 Subject: [PATCH 04/17] fixup --- pandas/_libs/groupby.pyx | 26 ++++++++++------- pandas/_libs/lib.pyx | 2 -- pandas/_libs/tslib.pyx | 10 ++++--- pandas/_libs/tslibs/dtypes.pyx | 5 ++-- pandas/_libs/tslibs/fields.pyx | 7 +++-- pandas/_libs/tslibs/np_datetime.pyx | 8 ++++-- pandas/_libs/tslibs/period.pyx | 8 ++++-- pandas/_libs/tslibs/timedeltas.pyx | 21 +++++++------- pandas/_libs/tslibs/vectorized.pyx | 4 +-- pandas/_libs/window/aggregations.pyx | 43 ++++++++++++++++------------ pandas/io/sas/sas.pyx | 18 ++++++++---- 11 files changed, 86 insertions(+), 66 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 01b655f32b548..c50e3766b1c8d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -265,7 +265,7 @@ def group_cumprod( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, + Py_ssize_t i, j, N, K, int64float_t val, na_val int64float_t[:, ::1] accum intp_t lab @@ -356,7 +356,7 @@ def group_cumsum( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, + Py_ssize_t i, j, N, K, int64float_t val, y, t, na_val int64float_t[:, ::1] accum, compensation uint8_t[:, ::1] accum_mask @@ -770,8 +770,12 @@ def group_sum( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + or sum_t is complex64_t + ): out[i, j] = NAN elif sum_t is int64_t: out[i, j] = NPY_NAT @@ -799,7 +803,7 @@ def group_prod( """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - int64float_t val, count + int64float_t val int64float_t[:, ::1] prodx int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) @@ -872,7 +876,7 @@ def group_var( floating[:, ::1] mean int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -969,7 +973,7 @@ def group_mean( mean_t[:, ::1] sumx, compensation int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1045,7 +1049,7 @@ def group_ohlc( Py_ssize_t i, N, K, lab int64float_t val uint8_t[::1] first_element_set - bint isna_entry, uses_mask = is not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1240,7 +1244,7 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, +cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): """ Find either the min or the max supported by numeric_object_t; 'val' is a @@ -1367,7 +1371,7 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif (numeric_object_t is float32_t or + elif (numeric_object_t is float32_t or numeric_object_t is float64_t): out[i, j] = NAN elif numeric_object_t is int64_t: @@ -1488,7 +1492,7 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif (numeric_object_t is float32_t or + elif (numeric_object_t is float32_t or numeric_object_t is float64_t): out[i, j] = NAN elif numeric_object_t is int64_t: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6cb5e711ad2b3..c17489e80b226 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2306,8 +2306,6 @@ def maybe_convert_numeric( seen.float_ = True else: try: - # status = floatify(val, &fval, &maybe_int) - if fval in na_values: seen.saw_null() floats[i] = complexes[i] = NaN diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5c2504e7ef78a..3bf33407e34c1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -610,8 +610,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't match - format specified" + f"time data \"{val}\" at position {i} doesn't " + "match format specified" ) return values, tz_out @@ -627,8 +627,10 @@ cpdef array_to_datetime( if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError(f"invalid string coercion to datetime for \"{val}\" - at position {i}") + raise TypeError( + f"invalid string coercion to datetime for \"{val}\" " + f"at position {i}" + ) if tz is not None: seen_datetime_offset = True diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 0693a142ec4f0..357227de2fc2c 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -396,8 +396,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: # TODO: use in _matplotlib.converter? -cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) - except? -1: +cpdef int64_t periods_per_day( + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns +) except? -1: """ How many of the given time units fit into a single day? """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index b4700c494a3fa..e14c39f5e155a 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -325,8 +325,11 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field,. - NPY_DATETIMEUNIT reso=NPY_FR_ns): +def get_date_field( + const int64_t[:] dtindex, + str field, + NPY_DATETIMEUNIT reso=NPY_FR_ns +): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index bf5cdd4a0de68..9a13b3ef255ad 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -171,7 +171,7 @@ class OutOfBoundsTimedelta(ValueError): pass -cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, +cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): if reso == NPY_FR_ns: upper[0] = _NS_MAX_DTS @@ -511,8 +511,10 @@ cdef ndarray astype_round_check( @cython.overflowcheck(True) -cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) - except? -1: +cdef int64_t get_conversion_factor( + NPY_DATETIMEUNIT from_unit, + NPY_DATETIMEUNIT to_unit +) except? -1: """ Find the factor by which we need to multiply to convert from from_unit to to_unit. """ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ecfaf0f96cf6d..dfbb97ad9a762 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1685,8 +1685,10 @@ cdef class _Period(PeriodMixin): raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") - if util.is_timedelta64_object(other) and - get_timedelta64_value(other) == NPY_NAT: + if ( + util.is_timedelta64_object(other) and + get_timedelta64_value(other) == NPY_NAT + ): # i.e. np.timedelta64("nat") return NaT @@ -2480,7 +2482,7 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section + strings are listed in the :ref:`offset alias section ` in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 297a291a52a66..9a1bc2f289057 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -176,7 +176,7 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, + ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) object[::1] res_flat = result.ravel() # should NOT be a copy @@ -469,8 +469,11 @@ cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: return parse_timedelta_string(item) -cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) - except? -1: +cdef inline int64_t _item_to_timedelta64( + object item, + str parsed_unit, + str errors +) except? -1: """ See array_to_timedelta64. """ @@ -969,7 +972,6 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): "Only resolutions 's', 'ms', 'us', 'ns' are supported." ) - td_base.value = value td_base._is_populated = 0 td_base._creso = reso @@ -1572,7 +1574,7 @@ class Timedelta(_Timedelta): "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta + cdef _Timedelta if value is _no_input: if not len(kwargs): @@ -1627,8 +1629,8 @@ class Timedelta(_Timedelta): if len(kwargs): # GH#48898 raise ValueError( - "Cannot pass both a Timedelta input and timedelta keyword - arguments, got " + "Cannot pass both a Timedelta input and timedelta keyword " + "arguments, got " f"{list(kwargs.keys())}" ) return value @@ -1715,7 +1717,7 @@ class Timedelta(_Timedelta): @cython.cdivision(True) def _round(self, freq, mode): cdef: - int64_t result, unit, + int64_t result, unit, ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset @@ -1804,9 +1806,6 @@ class Timedelta(_Timedelta): __rmul__ = __mul__ def __truediv__(self, other): - cdef: - int64_t - if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 0a16cf38eecca..75c17d9ad5e1f 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -138,7 +138,7 @@ def ints_to_pydatetime( npy_datetimestruct dts tzinfo new_tz - bint use_date = False, use_time = False, use_ts = False, use_pydt = False + bint use_date = False, use_ts = False, use_pydt = False object res_val # Note that `result` (and thus `result_flat`) is C-order and @@ -154,8 +154,6 @@ def ints_to_pydatetime( use_date = True elif box == "timestamp": use_ts = True - elif box == "time": - # use_time = True elif box == "datetime": use_pydt = True else: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 76d1c6cbdd952..edb1f46b27325 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -211,14 +211,14 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct cdef inline void add_mean( - float64_t val, - Py_ssize_t *nobs, + float64_t val, + Py_ssize_t *nobs, float64_t *sum_x, - Py_ssize_t *neg_ct, + Py_ssize_t *neg_ct, float64_t *compensation, - int64_t *num_consecutive_same_value, - float64_t *prev_value) - nogil: + int64_t *num_consecutive_same_value, + float64_t *prev_value +) nogil: """ add a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -317,8 +317,13 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # Rolling variance -cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, - float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: +cdef inline float64_t calc_var( + int64_t minp, + int ddof, + float64_t nobs, + float64_t ssqdm_x, + int64_t num_consecutive_same_value +) nogil: cdef: float64_t result @@ -337,14 +342,14 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, cdef inline void add_var( - float64_t val, - float64_t *nobs, + float64_t val, + float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, + float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, - float64_t *prev_value) - nogil: + int64_t *num_consecutive_same_value, + float64_t *prev_value, +) nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -379,12 +384,12 @@ cdef inline void add_var( cdef inline void remove_var( - float64_t val, - float64_t *nobs, + float64_t val, + float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, - float64_t *compensation) - nogil: + float64_t *ssqdm_x, + float64_t *compensation +) nogil: """ remove a value from the var calc """ cdef: float64_t delta, prev_mean, y, t diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index ea70453c43c11..944561031fdba 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -253,10 +253,16 @@ cdef: def _init_subheader_signatures(): - subheaders_32bit = [(sig, idx) for sig, idx in - const.subheader_signature_to_index.items() if len(sig) == 4] - subheaders_64bit = [(sig, idx) for sig, idx in - const.subheader_signature_to_index.items() if len(sig) == 8] + subheaders_32bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 4 + ] + subheaders_64bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 8 + ] assert len(subheaders_32bit) == 13 assert len(subheaders_64bit) == 17 assert len(const.subheader_signature_to_index) == 13 + 17 @@ -491,8 +497,8 @@ cdef class Parser: rpos = self.decompress(source, decompressed_source) if rpos != self.row_length: raise ValueError( - f"Expected decompressed line of length {self.row_length} bytes but - decompressed {rpos} bytes" + f"Expected decompressed line of length {self.row_length} bytes " + f"but decompressed {rpos} bytes" ) source = decompressed_source From 502403d957f52a16396574f40f7fe102d412a598 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 11:25:32 +0000 Subject: [PATCH 05/17] fixup --- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/lib.pyx | 28 ++++++++++++++++------------ pandas/_libs/tslibs/nattype.pyx | 10 +++++----- pandas/_libs/tslibs/period.pyx | 9 +++++---- pandas/_libs/tslibs/timedeltas.pyx | 4 +--- pandas/_libs/tslibs/timestamps.pyx | 8 +++++--- pandas/_libs/window/aggregations.pyx | 4 ++-- 7 files changed, 35 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index c50e3766b1c8d..afe0eb430b37c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -265,7 +265,7 @@ def group_cumprod( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, + Py_ssize_t i, j, N, K int64float_t val, na_val int64float_t[:, ::1] accum intp_t lab diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c17489e80b226..658b810e78187 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2217,17 +2217,21 @@ def maybe_convert_numeric( int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_FLOAT64, 0) - ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_COMPLEX128, - 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_INT64, 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_UINT64, 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, - cnp.NPY_UINT8, 0) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_FLOAT64, 0 + ) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_COMPLEX128, 0 + ) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_INT64, 0 + ) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT64, 0 + ) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT8, 0 + ) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable @@ -2923,7 +2927,7 @@ def to_object_array(rows: object, min_width: int = 0) -> ndarray: def tuples_to_object_array(ndarray[object] tuples): cdef: - Py_ssize_t i, j, n, k, + Py_ssize_t i, j, n, k ndarray[object, ndim=2] result tuple tup diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 877979239bfa3..26cd332c3007a 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -203,11 +203,11 @@ cdef class _NaT(datetime): result.fill("NaT") return result - """__rsub__ logic here - TODO(cython3): remove this, move above code out of - ``if not is_rsub`` block - timedelta64 - NaT we have to treat NaT as timedelta64 - for this to be meaningful, and the result is timedelta64""" + # __rsub__ logic here + # TODO(cython3): remove this, move above code out of + # ``if not is_rsub`` block + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index dfbb97ad9a762..19a9a934271ed 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1053,8 +1053,9 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): cdef: Py_ssize_t n = len(arr) Py_ssize_t increment = arr.strides[0] // 8 - ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, - 0) + ndarray[int64_t] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_INT64, 0 + ) _period_asfreq( cnp.PyArray_DATA(arr), @@ -2482,8 +2483,8 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section - ` in the user docs. + strings are listed in the + :ref:`offset alias section ` in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. year : int, default None diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9a1bc2f289057..eaa8a71a4ef87 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1574,8 +1574,6 @@ class Timedelta(_Timedelta): "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta - if value is _no_input: if not len(kwargs): raise ValueError("cannot construct a Timedelta without a " @@ -1717,7 +1715,7 @@ class Timedelta(_Timedelta): @cython.cdivision(True) def _round(self, freq, mode): cdef: - int64_t result, unit, + int64_t result, unit ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 1e31a0bf2e199..98d6103495837 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1524,9 +1524,11 @@ class Timestamp(_Timestamp): # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. if tz is not None: - if (is_integer_object(tz) - and is_integer_object(ts_input) - and is_integer_object(freq)): + if ( + is_integer_object(tz) + and is_integer_object(ts_input) + and is_integer_object(freq) + ): # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) # TODO(GH#45307): this will still be fragile to # mixed-and-matched positional/keyword arguments diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index edb1f46b27325..4455e825aee1b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -901,7 +901,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 int64_t nobs = 0, N = len(start), s, e, win - float64_t val, res, + float64_t val, res skiplist_t *sl ndarray[float64_t] output @@ -1300,7 +1300,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start, derived from roll_quantile """ cdef: - Py_ssize_t i, j, s, e, N = len(start), + Py_ssize_t i, j, s, e, N = len(start) float64_t rank_min = 0, rank = 0 int64_t nobs = 0, win float64_t val From 23bb33607d504310ba6314f0d4db6a5da11edffa Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 11:28:19 +0000 Subject: [PATCH 06/17] fixup --- pandas/_libs/lib.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 658b810e78187..cfca8244c0efe 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -767,7 +767,7 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, + Py_ssize_t lenidx, lenbin, i, j, bc ndarray[int64_t, ndim=1] bins int64_t, r_bin, nat_count bint right_closed = closed == 'right' @@ -2310,6 +2310,7 @@ def maybe_convert_numeric( seen.float_ = True else: try: + floatify(val, &fval, &maybe_int) if fval in na_values: seen.saw_null() floats[i] = complexes[i] = NaN @@ -2447,7 +2448,7 @@ def maybe_convert_objects(ndarray[object] objects, int64_t[::1] itimedeltas Seen seen = Seen() object val - float64_t, fnan = np.nan + float64_t fnan = np.nan n = len(objects) From 825f93d34585c711dc9ac4d4d72c5dee17449a74 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 11:29:44 +0000 Subject: [PATCH 07/17] pyupgrade --- pandas/io/clipboard/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 85fae6da07827..78289174b7e68 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -282,7 +282,7 @@ def copy_dev_clipboard(text): stacklevel=find_stack_level(), ) - with open("/dev/clipboard", "wt") as fd: + with open("/dev/clipboard", "w") as fd: fd.write(text) def paste_dev_clipboard() -> str: From e701c5143ce2289dc4d7c6d92b98874bd9f2de16 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 11:34:41 +0000 Subject: [PATCH 08/17] clean up --- pandas/_libs/tslibs/offsets.pyx | 15 +++++++++------ pandas/_libs/tslibs/parsing.pyx | 5 +++-- pandas/_libs/tslibs/timestamps.pyx | 7 ++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d6abeacb6f878..7c3dc87dee854 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2549,8 +2549,9 @@ cdef class SemiMonthOffset(SingleConstructorOffset): ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, - 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month @@ -2758,8 +2759,9 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = i8other.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, - 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday @@ -4149,8 +4151,9 @@ cdef ndarray _shift_bdays( """ cdef: Py_ssize_t i, n = i8other.size - ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, - 0) + ndarray result = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) int64_t val, res_val int wday, nadj, days npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ed65d36b01cd5..6f5b1e5b4e799 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -418,8 +418,9 @@ cdef parse_datetime_string_with_reso( from pandas import Timestamp parsed = Timestamp(date_string) else: - parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, - dts.us) + parsed = datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us + ) reso = { NPY_DATETIMEUNIT.NPY_FR_Y: "year", NPY_DATETIMEUNIT.NPY_FR_M: "month", diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 98d6103495837..4ad959c260c1a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -365,9 +365,10 @@ cdef class _Timestamp(ABCTimestamp): return False elif op == Py_NE: return True - raise TypeError("Cannot compare Timestamp with datetime.date. " - "Use ts == pd.Timestamp(date) or ts.date() == date instead." - ) + raise TypeError( + "Cannot compare Timestamp with datetime.date. " + "Use ts == pd.Timestamp(date) or ts.date() == date instead." + ) else: return NotImplemented From 1b1c9ab0cbf0729dd2f5b02a19960084d61c3685 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 14:08:51 +0000 Subject: [PATCH 09/17] fixup use_time in vectorized.pyx --- pandas/_libs/tslibs/vectorized.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 75c17d9ad5e1f..a86ee7b4d6cda 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -156,7 +156,7 @@ def ints_to_pydatetime( use_ts = True elif box == "datetime": use_pydt = True - else: + elif not use_time: raise ValueError( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) From d8fc8ad3b5de457c9b4f537e2d24511620a06d2d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 1 Nov 2022 14:14:16 +0000 Subject: [PATCH 10/17] fixup use_time in vectorized.pyx --- pandas/_libs/tslibs/vectorized.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index a86ee7b4d6cda..c828a9dfe0ccb 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -156,7 +156,7 @@ def ints_to_pydatetime( use_ts = True elif box == "datetime": use_pydt = True - elif not use_time: + elif box != "time": raise ValueError( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) From 437efbfb6dc8cf26c1ea356de52c8ac0b43797ed Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Wed, 2 Nov 2022 10:29:43 +0530 Subject: [PATCH 11/17] changes --- pandas/_libs/internals.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 222e2088dd4ab..9406ff84cd4ec 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -677,7 +677,12 @@ cdef class BlockManager: public list refs public object parent - def __cinit__(self, blocks=None, axes=None, refs=None, parent=None, verify_integrity=True): + def __cinit__(self, + blocks=None, + axes=None, + refs=None, + parent=None, + verify_integrity=True): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) From 787b2275d2c32e1c94680b887925d0b238c662c4 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Wed, 2 Nov 2022 13:58:52 +0530 Subject: [PATCH 12/17] up1 --- pandas/_libs/internals.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 9406ff84cd4ec..c52349bdbb5c2 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -677,12 +677,12 @@ cdef class BlockManager: public list refs public object parent - def __cinit__(self, - blocks=None, - axes=None, - refs=None, - parent=None, - verify_integrity=True): + def __cinit__(self, + blocks=None, + axes=None, + refs=None, + parent=None, + verify_integrity=True): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) From ad30abb991e864567d2c46aac816988fcbc680f4 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Thu, 3 Nov 2022 03:57:59 +0530 Subject: [PATCH 13/17] up2 --- pandas/_libs/tslibs/timestamps.pyx | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4a9380121895c..03967ee262aa7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1526,30 +1526,6 @@ class Timestamp(_Timestamp): # by the cython annotation. if tz is not None: - if ( - is_integer_object(tz) - and is_integer_object(ts_input) - and is_integer_object(freq) - ): - # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) - # TODO(GH#45307): this will still be fragile to - # mixed-and-matched positional/keyword arguments - ts_input = datetime( - ts_input, - freq, - tz, - unit or 0, - year or 0, - month or 0, - day or 0, - fold=fold or 0, - ) - nanosecond = hour - tz = tzinfo - return cls(ts_input, nanosecond=nanosecond, tz=tz) - - - raise ValueError('Can provide at most one of tz, tzinfo') # User passed tzinfo instead of tz; avoid silently ignoring From fa955103f7c5856c4e89a01c2e9ce97716ff068b Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Thu, 3 Nov 2022 03:58:45 +0530 Subject: [PATCH 14/17] up3 --- pandas/io/sas/sas.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 668f054ccf813..8c13566c656b7 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -375,8 +375,6 @@ cdef class Parser: def read(self, int nrows): cdef: bint done - - Py_ssize_t i for i in range(nrows): From 7066eaacdf2c25a402bdddcc47face9f0b75a99d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 3 Nov 2022 15:50:21 +0000 Subject: [PATCH 15/17] remove duplicated line, split acc. to black formatter --- pandas/_libs/groupby.pyx | 27 ++++++++++++++++++--------- pandas/_libs/internals.pyx | 14 ++++++++------ pandas/_libs/lib.pyx | 6 ++++-- pandas/_libs/testing.pyx | 10 ++++++---- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 7 +++++-- pandas/_libs/tslibs/offsets.pyx | 13 +++++++++---- pandas/_libs/tslibs/period.pyx | 5 +++-- pandas/_libs/tslibs/timedeltas.pyx | 5 +++-- pandas/_libs/tslibs/timestamps.pyx | 5 +++-- pandas/_libs/window/aggregations.pyx | 10 ++++++---- 11 files changed, 66 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index afe0eb430b37c..cf193de658dc6 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -743,8 +743,11 @@ def group_sum( # is otherwise the same as in _treat_as_na if uses_mask: isna_entry = mask[i, j] - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + ): # avoid warnings because of equality comparison isna_entry = not val == val elif sum_t is int64_t and is_datetimelike and val == NPY_NAT: @@ -774,7 +777,6 @@ def group_sum( sum_t is float32_t or sum_t is float64_t or sum_t is complex64_t - or sum_t is complex64_t ): out[i, j] = NAN elif sum_t is int64_t: @@ -1244,8 +1246,11 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, - bint is_datetimelike): +cdef numeric_object_t _get_min_or_max( + numeric_object_t val, + bint compute_max, + bint is_datetimelike, +): """ Find either the min or the max supported by numeric_object_t; 'val' is a placeholder to effectively make numeric_object_t an argument. @@ -1371,8 +1376,10 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif (numeric_object_t is float32_t or - numeric_object_t is float64_t): + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in @@ -1492,8 +1499,10 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif (numeric_object_t is float32_t or - numeric_object_t is float64_t): + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index c52349bdbb5c2..43e33ef3e7d7e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -677,12 +677,14 @@ cdef class BlockManager: public list refs public object parent - def __cinit__(self, - blocks=None, - axes=None, - refs=None, - parent=None, - verify_integrity=True): + def __cinit__( + self, + blocks=None, + axes=None, + refs=None, + parent=None, + verify_integrity=True, + ): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index fbf73ffcea79c..9b7c07d29261f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2314,6 +2314,7 @@ def maybe_convert_numeric( else: try: floatify(val, &fval, &maybe_int) + if fval in na_values: seen.saw_null() floats[i] = complexes[i] = NaN @@ -3064,8 +3065,9 @@ cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): key is assumed to have `not isna(key)` """ cdef: - ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, - cnp.NPY_BOOL, 0) + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_BOOL, 0 + ) Py_ssize_t i object item diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index dbfb8a96c495f..b7457f94f3447 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -161,15 +161,17 @@ cpdef assert_almost_equal(a, b, is_unequal = True diff += 1 if not first_diff: - first_diff = (f"At positional index {i}, " - f"first diff: {a[i]} != {b[i]}") + first_diff = ( + f"At positional index {i}, first diff: {a[i]} != {b[i]}" + ) if is_unequal: from pandas._testing import raise_assert_detail msg = (f"{obj} values are different " f"({np.round(diff * 100.0 / na, 5)} %)") - raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, - index_values=index_values) + raise_assert_detail( + obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values + ) return True diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index e14c39f5e155a..dda26ad3bebc6 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -328,7 +328,7 @@ def get_start_end_field( def get_date_field( const int64_t[:] dtindex, str field, - NPY_DATETIMEUNIT reso=NPY_FR_ns + NPY_DATETIMEUNIT reso=NPY_FR_ns, ): """ Given a int64-based datetime index, extract the year, month, etc., diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9a13b3ef255ad..b1ff456c84a70 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -171,8 +171,11 @@ class OutOfBoundsTimedelta(ValueError): pass -cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, - npy_datetimestruct *upper): +cdef get_implementation_bounds( + NPY_DATETIMEUNIT reso, + npy_datetimestruct *lower, + npy_datetimestruct *upper, +): if reso == NPY_FR_ns: upper[0] = _NS_MAX_DTS lower[0] = _NS_MIN_DTS diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index ece51d3d3b33f..50d6a0a02b0cf 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2262,8 +2262,12 @@ cdef class QuarterOffset(SingleConstructorOffset): def _apply_array(self, dtarr): reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, - reso=reso + dtarr.view("i8"), + self.n, + self.startingMonth, + self._day_opt, + modby=3, + reso=reso, ) return shifted @@ -3327,8 +3331,9 @@ cdef class FY5253Quarter(FY5253Mixin): for qlen in qtr_lens: if qlen * 7 <= tdelta.days: num_qtrs += 1 - tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso( - norm._creso) + tdelta -= ( + <_Timedelta>Timedelta(days=qlen * 7) + )._as_creso(norm._creso) else: break else: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 19a9a934271ed..0e7cfa4dd9670 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1439,8 +1439,9 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: cdef: Py_ssize_t i, n = values.size int64_t ordinal - ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, - cnp.NPY_INT64, 0) + ndarray ordinals = cnp.PyArray_EMPTY( + values.ndim, values.shape, cnp.NPY_INT64, 0 + ) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index eaa8a71a4ef87..a96ec8c2ab80a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -176,8 +176,9 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, - cnp.NPY_OBJECT, 0) + ndarray result = cnp.PyArray_EMPTY( + m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0 + ) object[::1] res_flat = result.ravel() # should NOT be a copy ndarray arr = m8values.view("i8") diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 03967ee262aa7..ab0484f15b7b4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1645,8 +1645,9 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, - ts.creso) + return create_timestamp_from_ts( + ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso + ) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 4455e825aee1b..702706f00455b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -172,8 +172,9 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, add_sum(values[j], &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, - prev_value) + output[i] = calc_sum( + minp, nobs, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 @@ -303,8 +304,9 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, - prev_value) + output[i] = calc_mean( + minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 From d631b56d33f01e5cd6f32adc70364df06c494c2b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 4 Nov 2022 08:21:18 +0000 Subject: [PATCH 16/17] remove added newline --- pandas/_libs/tslibs/timestamps.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ab0484f15b7b4..afb93e34935f0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1525,7 +1525,6 @@ class Timestamp(_Timestamp): # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. if tz is not None: - raise ValueError('Can provide at most one of tz, tzinfo') # User passed tzinfo instead of tz; avoid silently ignoring From 1b830694710fef4e38cc1801dd676a2907d2d68f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 4 Nov 2022 08:49:45 +0000 Subject: [PATCH 17/17] remove misplaced commas --- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/join.pyx | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index cf193de658dc6..a351ad6e461f3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -356,7 +356,7 @@ def group_cumsum( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, + Py_ssize_t i, j, N, K int64float_t val, y, t, na_val int64float_t[:, ::1] accum, compensation uint8_t[:, ::1] accum_mask diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 1f2d717cab88c..667eda1b1f1da 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -275,7 +275,7 @@ def left_join_indexer_unique( cdef: Py_ssize_t i, j, nleft, nright ndarray[intp_t] indexer - numeric_object_t, rval + numeric_object_t rval i = 0 j = 0 diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 9b7c07d29261f..1b871bf0b745f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -769,7 +769,7 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, cdef: Py_ssize_t lenidx, lenbin, i, j, bc ndarray[int64_t, ndim=1] bins - int64_t, r_bin, nat_count + int64_t r_bin, nat_count bint right_closed = closed == 'right' nat_count = 0 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 9fea860500b2e..28259c9db26e5 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -225,7 +225,7 @@ timedelta-like} cdef: ndarray[uint8_t, cast=True] ambiguous_array Py_ssize_t i, n = vals.shape[0] - Py_ssize_t delta_idx_offset, delta_idx, + Py_ssize_t delta_idx_offset, delta_idx int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0