diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ff7526b87521..1893f57fc09c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,16 +18,16 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/python/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 + rev: v2.2.2 hooks: - id: codespell types_or: [python, rst, markdown] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.1.8 + rev: v0.2.1 hooks: - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks @@ -60,7 +60,7 @@ repos: - flake8-bugbear==22.7.1 - pandas-dev-flaker==0.5.0 - repo: https://github.com/pycqa/pylint - rev: v2.15.3 + rev: v2.15.5 hooks: - id: pylint - repo: https://github.com/PyCQA/isort @@ -68,7 +68,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.38.2 + rev: v3.2.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -83,7 +83,7 @@ repos: types: [text] # overwrite types: [rst] types_or: [python, rst] - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.6.1 + rev: v0.6.7 hooks: - id: sphinx-lint - repo: https://github.com/asottile/yesqa diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 96c47471aaf90..7b9fe6422544c 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -81,26 +81,48 @@ class Infinity: """ Provide a positive Infinity comparison method for ranking. """ - __lt__ = lambda self, other: False - __le__ = lambda self, other: isinstance(other, Infinity) - __eq__ = lambda self, other: isinstance(other, Infinity) - __ne__ = lambda self, other: not isinstance(other, Infinity) - __gt__ = lambda self, other: (not isinstance(other, Infinity) and - not missing.checknull(other)) - __ge__ = lambda self, other: not missing.checknull(other) + def __lt__(self, other): + return False + + def __le__(self, other): + return isinstance(other, Infinity) + + def __eq__(self, other): + return isinstance(other, Infinity) + + def __ne__(self, other): + return not isinstance(other, Infinity) + + def __gt__(self, other): + return (not isinstance(other, Infinity) and + not missing.checknull(other)) + + def __ge__(self, other): + return not missing.checknull(other) class NegInfinity: """ Provide a negative Infinity comparison method for ranking. """ - __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and - not missing.checknull(other)) - __le__ = lambda self, other: not missing.checknull(other) - __eq__ = lambda self, other: isinstance(other, NegInfinity) - __ne__ = lambda self, other: not isinstance(other, NegInfinity) - __gt__ = lambda self, other: False - __ge__ = lambda self, other: isinstance(other, NegInfinity) + def __lt__(self, other): + return (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + + def __le__(self, other): + return not missing.checknull(other) + + def __eq__(self, other): + return isinstance(other, NegInfinity) + + def __ne__(self, other): + return not isinstance(other, NegInfinity) + + def __gt__(self, other): + return False + + def __ge__(self, other): + return isinstance(other, NegInfinity) @cython.wraparound(False) @@ -321,7 +343,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: @cython.cdivision(True) def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K bint minpv float64_t[:, ::1] result ndarray[uint8_t, ndim=2] mask @@ -377,7 +399,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): @cython.wraparound(False) def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray: cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K ndarray[float64_t, ndim=2] result ndarray[float64_t, ndim=2] ranked_mat ndarray[float64_t, ndim=1] rankedx, rankedy @@ -746,7 +768,8 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and arr[0] == NPY_NAT): + if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and + arr[0] == NPY_NAT): # single value is NaN return False, False, True else: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index f798655e9d922..a351ad6e461f3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -265,7 +265,7 @@ def group_cumprod( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K int64float_t val, na_val int64float_t[:, ::1] accum intp_t lab @@ -356,7 +356,7 @@ def group_cumsum( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K int64float_t val, y, t, na_val int64float_t[:, ::1] accum, compensation uint8_t[:, ::1] accum_mask @@ -441,7 +441,7 @@ def group_shift_indexer( int periods, ) -> None: cdef: - Py_ssize_t N, i, j, ii, lab + Py_ssize_t N, i, ii, lab int offset = 0, sign int64_t idxer, idxer_slot int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64) @@ -743,8 +743,11 @@ def group_sum( # is otherwise the same as in _treat_as_na if uses_mask: isna_entry = mask[i, j] - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + ): # avoid warnings because of equality comparison isna_entry = not val == val elif sum_t is int64_t and is_datetimelike and val == NPY_NAT: @@ -770,8 +773,11 @@ def group_sum( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + ): out[i, j] = NAN elif sum_t is int64_t: out[i, j] = NPY_NAT @@ -799,7 +805,7 @@ def group_prod( """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - int64float_t val, count + int64float_t val int64float_t[:, ::1] prodx int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) @@ -872,7 +878,7 @@ def group_var( floating[:, ::1] mean int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -969,7 +975,7 @@ def group_mean( mean_t[:, ::1] sumx, compensation int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1042,10 +1048,10 @@ def group_ohlc( Only aggregates on axis=0 """ cdef: - Py_ssize_t i, j, N, K, lab + Py_ssize_t i, N, K, lab int64float_t val uint8_t[::1] first_element_set - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1240,7 +1246,11 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): +cdef numeric_object_t _get_min_or_max( + numeric_object_t val, + bint compute_max, + bint is_datetimelike, +): """ Find either the min or the max supported by numeric_object_t; 'val' is a placeholder to effectively make numeric_object_t an argument. @@ -1366,7 +1376,10 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in @@ -1486,7 +1499,10 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 7f0f91652ae0d..43e33ef3e7d7e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -133,7 +133,7 @@ cdef class BlockPlacement: @property def as_array(self) -> np.ndarray: cdef: - Py_ssize_t start, stop, end, _ + Py_ssize_t start, stop, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) @@ -259,7 +259,6 @@ cdef class BlockPlacement: """ cdef: slice slc = self._ensure_has_slice() - slice new_slice ndarray[intp_t, ndim=1] new_placement if slc is not None and slc.step == 1: @@ -678,7 +677,14 @@ cdef class BlockManager: public list refs public object parent - def __cinit__(self, blocks=None, axes=None, refs=None, parent=None, verify_integrity=True): + def __cinit__( + self, + blocks=None, + axes=None, + refs=None, + parent=None, + verify_integrity=True, + ): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index e574aa10f6b57..667eda1b1f1da 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -275,7 +275,7 @@ def left_join_indexer_unique( cdef: Py_ssize_t i, j, nleft, nright ndarray[intp_t] indexer - numeric_object_t lval, rval + numeric_object_t rval i = 0 j = 0 @@ -324,7 +324,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] is non-unique (if both were unique we'd use left_join_indexer_unique). """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result @@ -434,7 +434,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] Both left and right are monotonic increasing but not necessarily unique. """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 56fdbfccacc55..1b871bf0b745f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -621,6 +621,8 @@ ctypedef fused ndarr_object: # TODO: get rid of this in StringArray and modify # and go through ensure_string_array instead + + @cython.wraparound(False) @cython.boundscheck(False) def convert_nans_to_NA(ndarr_object arr) -> ndarray: @@ -765,9 +767,9 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, vc + Py_ssize_t lenidx, lenbin, i, j, bc ndarray[int64_t, ndim=1] bins - int64_t l_bin, r_bin, nat_count + int64_t r_bin, nat_count bint right_closed = closed == 'right' nat_count = 0 @@ -2215,14 +2217,24 @@ def maybe_convert_numeric( # Otherwise, iterate and do full inference. cdef: - int status, maybe_int + int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0) - ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_COMPLEX128, 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_FLOAT64, 0 + ) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_COMPLEX128, 0 + ) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_INT64, 0 + ) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT64, 0 + ) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT8, 0 + ) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable @@ -2301,7 +2313,7 @@ def maybe_convert_numeric( seen.float_ = True else: try: - status = floatify(val, &fval, &maybe_int) + floatify(val, &fval, &maybe_int) if fval in na_values: seen.saw_null() @@ -2440,7 +2452,7 @@ def maybe_convert_objects(ndarray[object] objects, int64_t[::1] itimedeltas Seen seen = Seen() object val - float64_t fval, fnan = np.nan + float64_t fnan = np.nan n = len(objects) @@ -2925,7 +2937,7 @@ def to_object_array(rows: object, min_width: int = 0) -> ndarray: def tuples_to_object_array(ndarray[object] tuples): cdef: - Py_ssize_t i, j, n, k, tmp + Py_ssize_t i, j, n, k ndarray[object, ndim=2] result tuple tup @@ -3053,7 +3065,9 @@ cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): key is assumed to have `not isna(key)` """ cdef: - ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0) + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_BOOL, 0 + ) Py_ssize_t i object item diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 679cde9932a7a..b7457f94f3447 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -161,13 +161,17 @@ cpdef assert_almost_equal(a, b, is_unequal = True diff += 1 if not first_diff: - first_diff = f"At positional index {i}, first diff: {a[i]} != {b[i]}" + first_diff = ( + f"At positional index {i}, first diff: {a[i]} != {b[i]}" + ) if is_unequal: from pandas._testing import raise_assert_detail msg = (f"{obj} values are different " f"({np.round(diff * 100.0 / na, 5)} %)") - raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values) + raise_assert_detail( + obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values + ) return True diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bf912005ae57e..6d6e90673f030 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -261,7 +261,7 @@ def array_with_unit_to_datetime( tz : parsed timezone offset or None """ cdef: - Py_ssize_t i, j, n=len(values) + Py_ssize_t i, n=len(values) int64_t mult int prec = 0 ndarray[float64_t] fvalues @@ -418,6 +418,7 @@ def array_with_unit_to_datetime( return oresult, tz + @cython.wraparound(False) @cython.boundscheck(False) def first_non_null(values: ndarray) -> int: @@ -425,7 +426,6 @@ def first_non_null(values: ndarray) -> int: cdef: Py_ssize_t n = len(values) Py_ssize_t i - int result for i in range(n): val = values[i] if checknull_with_nat_and_na(val): @@ -436,6 +436,7 @@ def first_non_null(values: ndarray) -> int: else: return -1 + @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime( @@ -610,7 +611,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't match format specified" + f"time data \"{val}\" at position {i} doesn't " + "match format specified" ) return values, tz_out @@ -626,7 +628,10 @@ cpdef array_to_datetime( if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError(f"invalid string coercion to datetime for \"{val}\" at position {i}") + raise TypeError( + f"invalid string coercion to datetime for \"{val}\" " + f"at position {i}" + ) if tz is not None: seen_datetime_offset = True diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 94781374296fa..357227de2fc2c 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -396,7 +396,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: # TODO: use in _matplotlib.converter? -cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: +cpdef int64_t periods_per_day( + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns +) except? -1: """ How many of the given time units fit into a single day? """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3c7406d231241..dda26ad3bebc6 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -325,7 +325,11 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field, NPY_DATETIMEUNIT reso=NPY_FR_ns): +def get_date_field( + const int64_t[:] dtindex, + str field, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 79299ec38e19c..26cd332c3007a 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -204,9 +204,10 @@ cdef class _NaT(datetime): return result # __rsub__ logic here - # TODO(cython3): remove this, move above code out of ``if not is_rsub`` block + # TODO(cython3): remove this, move above code out of + # ``if not is_rsub`` block # timedelta64 - NaT we have to treat NaT as timedelta64 - # for this to be meaningful, and the result is timedelta64 + # for this to be meaningful, and the result is timedelta64 result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result @@ -240,7 +241,8 @@ cdef class _NaT(datetime): result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result - # other cases are same, swap operands is allowed even though we subtract because this is NaT + # other cases are same, swap operands is allowed even though we subtract + # because this is NaT return self.__sub__(other) def __pos__(self): @@ -1201,6 +1203,7 @@ default 'raise' NaT """, ) + @property def tz(self) -> None: return None diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 07872050dc822..b1ff456c84a70 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -46,7 +46,7 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS - PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -171,7 +171,11 @@ class OutOfBoundsTimedelta(ValueError): pass -cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): +cdef get_implementation_bounds( + NPY_DATETIMEUNIT reso, + npy_datetimestruct *lower, + npy_datetimestruct *upper, +): if reso == NPY_FR_ns: upper[0] = _NS_MAX_DTS lower[0] = _NS_MIN_DTS @@ -420,7 +424,6 @@ def compare_mismatched_resolutions(ndarray left, ndarray right, op): Py_ssize_t i, N = left.size npy_datetimestruct ldts, rdts - for i in range(N): # Analogous to: lval = lvalues[i] lval = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -511,7 +514,10 @@ cdef ndarray astype_round_check( @cython.overflowcheck(True) -cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1: +cdef int64_t get_conversion_factor( + NPY_DATETIMEUNIT from_unit, + NPY_DATETIMEUNIT to_unit +) except? -1: """ Find the factor by which we need to multiply to convert from from_unit to to_unit. """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index bbb17d8a2bbcf..50d6a0a02b0cf 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2262,7 +2262,12 @@ cdef class QuarterOffset(SingleConstructorOffset): def _apply_array(self, dtarr): reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso + dtarr.view("i8"), + self.n, + self.startingMonth, + self._day_opt, + modby=3, + reso=reso, ) return shifted @@ -2542,7 +2547,9 @@ cdef class SemiMonthOffset(SingleConstructorOffset): ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month @@ -2750,7 +2757,9 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = i8other.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday @@ -3322,7 +3331,9 @@ cdef class FY5253Quarter(FY5253Mixin): for qlen in qtr_lens: if qlen * 7 <= tdelta.days: num_qtrs += 1 - tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso(norm._creso) + tdelta -= ( + <_Timedelta>Timedelta(days=qlen * 7) + )._as_creso(norm._creso) else: break else: @@ -4139,7 +4150,9 @@ cdef ndarray _shift_bdays( """ cdef: Py_ssize_t i, n = i8other.size - ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray result = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) int64_t val, res_val int wday, nadj, days npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 1312124cfb77b..6f5b1e5b4e799 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -418,7 +418,9 @@ cdef parse_datetime_string_with_reso( from pandas import Timestamp parsed = Timestamp(date_string) else: - parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + parsed = datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us + ) reso = { NPY_DATETIMEUNIT.NPY_FR_Y: "year", NPY_DATETIMEUNIT.NPY_FR_M: "month", @@ -717,7 +719,8 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + def parse_date(x): + return du_parse(x, dayfirst=dayfirst, default=default) # EAFP here try: @@ -1050,6 +1053,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: else: return None + cdef str _fill_token(token: str, padding: int): cdef str token_filled if '.' not in token: @@ -1064,6 +1068,7 @@ cdef str _fill_token(token: str, padding: int): token_filled = f'{seconds}.{nanoseconds}' return token_filled + @cython.wraparound(False) @cython.boundscheck(False) cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index be6f87791284e..0e7cfa4dd9670 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1053,7 +1053,9 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): cdef: Py_ssize_t n = len(arr) Py_ssize_t increment = arr.strides[0] // 8 - ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, 0) + ndarray[int64_t] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_INT64, 0 + ) _period_asfreq( cnp.PyArray_DATA(arr), @@ -1362,7 +1364,6 @@ def get_period_field_arr(str field, const int64_t[:] arr, int freq): cdef: Py_ssize_t i, sz int64_t[::1] out - accessor f func = _get_accessor_func(field) if func is NULL: @@ -1438,7 +1439,9 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: cdef: Py_ssize_t i, n = values.size int64_t ordinal - ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + ndarray ordinals = cnp.PyArray_EMPTY( + values.ndim, values.shape, cnp.NPY_INT64, 0 + ) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p @@ -1684,7 +1687,10 @@ cdef class _Period(PeriodMixin): raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") - if util.is_timedelta64_object(other) and get_timedelta64_value(other) == NPY_NAT: + if ( + util.is_timedelta64_object(other) and + get_timedelta64_value(other) == NPY_NAT + ): # i.e. np.timedelta64("nat") return NaT @@ -2478,7 +2484,8 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section ` in the user docs. + strings are listed in the + :ref:`offset alias section ` in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. year : int, default None @@ -2511,7 +2518,6 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self if freq is not None: freq = cls._maybe_convert_freq(freq) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 6287c2fbc5d34..f540ad19c48d2 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -75,7 +75,6 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai int iso_week, iso_year int64_t us, ns object val, group_key, ampm, found, timezone - dict found_key bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f3de67b705d4d..a96ec8c2ab80a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -176,7 +176,9 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) + ndarray result = cnp.PyArray_EMPTY( + m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0 + ) object[::1] res_flat = result.ravel() # should NOT be a copy ndarray arr = m8values.view("i8") @@ -468,7 +470,11 @@ cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: return parse_timedelta_string(item) -cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1: +cdef inline int64_t _item_to_timedelta64( + object item, + str parsed_unit, + str errors +) except? -1: """ See array_to_timedelta64. """ @@ -967,7 +973,6 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): "Only resolutions 's', 'ms', 'us', 'ns' are supported." ) - td_base.value = value td_base._is_populated = 0 td_base._creso = reso @@ -1570,8 +1575,6 @@ class Timedelta(_Timedelta): "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - if value is _no_input: if not len(kwargs): raise ValueError("cannot construct a Timedelta without a " @@ -1625,7 +1628,8 @@ class Timedelta(_Timedelta): if len(kwargs): # GH#48898 raise ValueError( - "Cannot pass both a Timedelta input and timedelta keyword arguments, got " + "Cannot pass both a Timedelta input and timedelta keyword " + "arguments, got " f"{list(kwargs.keys())}" ) return value @@ -1712,7 +1716,7 @@ class Timedelta(_Timedelta): @cython.cdivision(True) def _round(self, freq, mode): cdef: - int64_t result, unit, remainder + int64_t result, unit ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset @@ -1801,9 +1805,6 @@ class Timedelta(_Timedelta): __rmul__ = __mul__ def __truediv__(self, other): - cdef: - int64_t new_value - if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f80cb2e27cc23..afb93e34935f0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -267,7 +267,6 @@ cdef class _Timestamp(ABCTimestamp): @classmethod def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz): cdef: - npy_datetimestruct dts _TSObject obj = _TSObject() if value == NPY_NAT: @@ -294,7 +293,6 @@ cdef class _Timestamp(ABCTimestamp): # This is herely mainly so we can incrementally implement non-nano # (e.g. only tznaive at first) cdef: - npy_datetimestruct dts int64_t value NPY_DATETIMEUNIT reso @@ -317,7 +315,6 @@ cdef class _Timestamp(ABCTimestamp): def __richcmp__(_Timestamp self, object other, int op): cdef: _Timestamp ots - int ndim if isinstance(other, _Timestamp): ots = other @@ -368,7 +365,8 @@ cdef class _Timestamp(ABCTimestamp): return False elif op == Py_NE: return True - raise TypeError("Cannot compare Timestamp with datetime.date. " + raise TypeError( + "Cannot compare Timestamp with datetime.date. " "Use ts == pd.Timestamp(date) or ts.date() == date instead." ) else: @@ -1646,7 +1644,9 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso) + return create_timestamp_from_ts( + ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso + ) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e2812178a2b43..28259c9db26e5 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -224,14 +224,13 @@ timedelta-like} """ cdef: ndarray[uint8_t, cast=True] ambiguous_array - Py_ssize_t i, idx, pos, n = vals.shape[0] - Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + Py_ssize_t i, n = vals.shape[0] + Py_ssize_t delta_idx_offset, delta_idx int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result - npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False bint fill_nonexist = False diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 6a6b156af3dc4..c828a9dfe0ccb 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -138,7 +138,7 @@ def ints_to_pydatetime( npy_datetimestruct dts tzinfo new_tz - bint use_date = False, use_time = False, use_ts = False, use_pydt = False + bint use_date = False, use_ts = False, use_pydt = False object res_val # Note that `result` (and thus `result_flat`) is C-order and @@ -154,11 +154,9 @@ def ints_to_pydatetime( use_date = True elif box == "timestamp": use_ts = True - elif box == "time": - use_time = True elif box == "datetime": use_pydt = True - else: + elif box != "time": raise ValueError( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 68c05f2bb2c98..702706f00455b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -172,7 +172,9 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, add_sum(values[j], &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_sum( + minp, nobs, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 @@ -209,9 +211,15 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct return result -cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, - Py_ssize_t *neg_ct, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: +cdef inline void add_mean( + float64_t val, + Py_ssize_t *nobs, + float64_t *sum_x, + Py_ssize_t *neg_ct, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value +) nogil: """ add a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -296,7 +304,9 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_mean( + minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 @@ -309,8 +319,13 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # Rolling variance -cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, - float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: +cdef inline float64_t calc_var( + int64_t minp, + int ddof, + float64_t nobs, + float64_t ssqdm_x, + int64_t num_consecutive_same_value +) nogil: cdef: float64_t result @@ -328,9 +343,15 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, return result -cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: +cdef inline void add_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value, +) nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -364,8 +385,13 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) -cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation) nogil: +cdef inline void remove_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation +) nogil: """ remove a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -566,7 +592,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, min_val, mean_val, sum_val = 0 + float64_t val, min_val, mean_val, sum_val = 0 float64_t compensation_xxx_add, compensation_xxx_remove float64_t compensation_xx_add, compensation_xx_remove float64_t compensation_x_add, compensation_x_remove @@ -574,7 +600,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, float64_t prev_value int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 int64_t s, e, num_consecutive_same_value - ndarray[float64_t] output, mean_array, values_copy + ndarray[float64_t] output, values_copy bint is_monotonic_increasing_bounds minp = max(minp, 3) @@ -779,7 +805,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, mean_val, min_val, sum_val = 0 + float64_t val, mean_val, min_val, sum_val = 0 float64_t compensation_xxxx_add, compensation_xxxx_remove float64_t compensation_xxx_remove, compensation_xxx_add float64_t compensation_xx_remove, compensation_xx_add @@ -853,7 +879,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, &compensation_xxx_add, &compensation_xxxx_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value) + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, + num_consecutive_same_value) if not is_monotonic_increasing_bounds: nobs = 0 @@ -876,7 +903,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 int64_t nobs = 0, N = len(start), s, e, win - float64_t val, res, prev + float64_t val, res skiplist_t *sl ndarray[float64_t] output @@ -1149,7 +1176,7 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, Py_ssize_t i, j, s, e, N = len(start), idx int ret = 0 int64_t nobs = 0, win - float64_t val, prev, midpoint, idx_with_fraction + float64_t val, idx_with_fraction float64_t vlow, vhigh skiplist_t *skiplist InterpolationType interpolation_type @@ -1275,7 +1302,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start, derived from roll_quantile """ cdef: - Py_ssize_t i, j, s, e, N = len(start), idx + Py_ssize_t i, j, s, e, N = len(start) float64_t rank_min = 0, rank = 0 int64_t nobs = 0, win float64_t val diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3e4f116953cb3..89ac1c10254cb 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -283,7 +283,7 @@ class SettingWithCopyError(ValueError): The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, @@ -306,7 +306,7 @@ class SettingWithCopyWarning(Warning): 'Warn' is the default option. This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 85fae6da07827..78289174b7e68 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -282,7 +282,7 @@ def copy_dev_clipboard(text): stacklevel=find_stack_level(), ) - with open("/dev/clipboard", "wt") as fd: + with open("/dev/clipboard", "w") as fd: fd.write(text) def paste_dev_clipboard() -> str: diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx index 4620403910274..2a4d3f66a5d7d 100644 --- a/pandas/io/sas/byteswap.pyx +++ b/pandas/io/sas/byteswap.pyx @@ -1,5 +1,6 @@ """ -The following are faster versions of struct.unpack that avoid the overhead of Python function calls. +The following are faster versions of struct.unpack that avoid the overhead of Python +function calls. In the SAS7BDAT parser, they may be called up to (n_rows * n_cols) times. """ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index b9897434666ef..8c13566c656b7 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -254,8 +254,16 @@ cdef: def _init_subheader_signatures(): - subheaders_32bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 4] - subheaders_64bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 8] + subheaders_32bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 4 + ] + subheaders_64bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 8 + ] assert len(subheaders_32bit) == 13 assert len(subheaders_64bit) == 17 assert len(const.subheader_signature_to_index) == 13 + 17 @@ -491,7 +499,8 @@ cdef class Parser: rpos = self.decompress(source, decompressed_source) if rpos != self.row_length: raise ValueError( - f"Expected decompressed line of length {self.row_length} bytes but decompressed {rpos} bytes" + f"Expected decompressed line of length {self.row_length} bytes " + f"but decompressed {rpos} bytes" ) source = decompressed_source