diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index b2ffbb3ecb4f2..6efe5e90f522f 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -146,7 +146,8 @@ def group_cumprod_float64(float64_t[:, :] out, int ngroups, bint is_datetimelike, bint skipna=True): - """Cumulative product of columns of `values`, in row groups `labels`. + """ + Cumulative product of columns of `values`, in row groups `labels`. Parameters ---------- @@ -203,7 +204,8 @@ def group_cumsum(numeric[:, :] out, int ngroups, is_datetimelike, bint skipna=True): - """Cumulative sum of columns of `values`, in row groups `labels`. + """ + Cumulative sum of columns of `values`, in row groups `labels`. Parameters ---------- @@ -314,7 +316,8 @@ def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, ndarray[uint8_t] mask, object direction, int64_t limit): - """Indexes how to fill values forwards or backwards within a group + """ + Indexes how to fill values forwards or backwards within a group. Parameters ---------- diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d9bde2a471e06..cc114b48a5b53 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -227,7 +227,7 @@ cdef class IndexEngine: cdef _get_index_values(self): return self.vgetter() - def _call_monotonic(self, values): + cdef _call_monotonic(self, values): return algos.is_monotonic(values, timelike=False) def get_backfill_indexer(self, other, limit=None): @@ -236,7 +236,7 @@ cdef class IndexEngine: def get_pad_indexer(self, other, limit=None): return algos.pad(self._get_index_values(), other, limit=limit) - cdef _make_hash_table(self, n): + cdef _make_hash_table(self, Py_ssize_t n): raise NotImplementedError cdef _check_type(self, object val): @@ -262,7 +262,7 @@ cdef class IndexEngine: self.need_unique_check = 0 - cpdef _call_map_locations(self, values): + cdef void _call_map_locations(self, values): self.mapping.map_locations(values) def clear_mapping(self): @@ -391,7 +391,7 @@ cdef class ObjectEngine(IndexEngine): """ Index Engine for use with object-dtype Index, namely the base class Index """ - cdef _make_hash_table(self, n): + cdef _make_hash_table(self, Py_ssize_t n): return _hash.PyObjectHashTable(n) @@ -418,7 +418,7 @@ cdef class DatetimeEngine(Int64Engine): cdef _get_index_values(self): return self.vgetter().view('i8') - def _call_monotonic(self, values): + cdef _call_monotonic(self, values): return algos.is_monotonic(values, timelike=True) cpdef get_loc(self, object val): @@ -500,11 +500,13 @@ cdef class PeriodEngine(Int64Engine): cdef _get_index_values(self): return super(PeriodEngine, self).vgetter() - cpdef _call_map_locations(self, values): - super(PeriodEngine, self)._call_map_locations(values.view('i8')) + cdef void _call_map_locations(self, values): + # super(...) pattern doesn't seem to work with `cdef` + Int64Engine._call_map_locations(self, values.view('i8')) - def _call_monotonic(self, values): - return super(PeriodEngine, self)._call_monotonic(values.view('i8')) + cdef _call_monotonic(self, values): + # super(...) pattern doesn't seem to work with `cdef` + return Int64Engine._call_monotonic(self, values.view('i8')) def get_indexer(self, values): cdef ndarray[int64_t, ndim=1] ordinals diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index 4db048eeb0383..093cca4fe7ed5 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -29,7 +29,7 @@ dtypes = [('Float64', 'float64', 'float64_t', 'Float64', 'float64'), cdef class {{name}}Engine(IndexEngine): - cdef _make_hash_table(self, n): + cdef _make_hash_table(self, Py_ssize_t n): return _hash.{{hashtable_name}}HashTable(n) {{if name not in {'Float64', 'Float32'} }} @@ -38,7 +38,7 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} - cpdef _call_map_locations(self, values): + cdef void _call_map_locations(self, values): # self.mapping is of type {{hashtable_name}}HashTable, # so convert dtype of values self.mapping.map_locations(algos.ensure_{{hashtable_dtype}}(values)) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 48190d123f4a9..ff143fea892ae 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -319,7 +319,7 @@ cdef slice_getitem(slice slc, ind): @cython.boundscheck(False) @cython.wraparound(False) -cpdef slice indexer_as_slice(int64_t[:] vals): +cdef slice indexer_as_slice(int64_t[:] vals): cdef: Py_ssize_t i, n, start, stop int64_t d diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7fc4fede1996b..eddc0beae7b8b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2168,8 +2168,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, @cython.boundscheck(False) @cython.wraparound(False) -def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, - bint convert=1): +def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7ed131e1c7608..f505c0479e944 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -105,7 +105,7 @@ cdef class Reducer: flatiter it bint has_labels, has_ndarray_labels object res, name, labels, index - object cached_typ=None + object cached_typ = None arr = self.arr chunk = self.dummy @@ -248,7 +248,7 @@ cdef class SeriesBinGrouper: object res bint initialized = 0 Slider vslider, islider - object name, cached_typ=None, cached_ityp=None + object name, cached_typ = None, cached_ityp = None counts = np.zeros(self.ngroups, dtype=np.int64) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 796d1400194fd..27abedcda5adf 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -581,7 +581,7 @@ def try_parse_dates(object[:] values, parser=None, else: result[i] = parse_date(values[i]) except Exception: - # Since parser is user-defined, we can't guess what it migh raise + # Since parser is user-defined, we can't guess what it might raise return values else: parse_date = parser diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5139fdfeeb916..ca36546b8937a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -260,9 +260,9 @@ def _get_data_algo(values): if lib.infer_dtype(values, skipna=False) in ["string"]: ndtype = "string" - f = _hashtables.get(ndtype, _hashtables["object"]) + htable = _hashtables.get(ndtype, _hashtables["object"]) - return f, values + return htable, values # --------------- # diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 3cc34ea1f4ed7..dc6378e83d229 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -29,7 +29,7 @@ def _check_ne_builtin_clash(expr): overlap = names & _ne_builtins if overlap: - s = ", ".join(map(repr, overlap)) + s = ", ".join(repr(x) for x in overlap) raise NumExprClobberingError( 'Variables in expression "{expr}" ' "overlap with builtins: ({s})".format(expr=expr, s=s) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 28b6aef693bfe..dc0f381414970 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -4,7 +4,7 @@ from datetime import datetime from distutils.version import LooseVersion from functools import partial -import operator as op +import operator import numpy as np @@ -18,7 +18,7 @@ from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded -_reductions = "sum", "prod" +_reductions = ("sum", "prod") _unary_math_ops = ( "sin", @@ -273,20 +273,37 @@ def _not_in(x, y): return x not in y -_cmp_ops_syms = ">", "<", ">=", "<=", "==", "!=", "in", "not in" -_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne, _in, _not_in +_cmp_ops_syms = (">", "<", ">=", "<=", "==", "!=", "in", "not in") +_cmp_ops_funcs = ( + operator.gt, + operator.lt, + operator.ge, + operator.le, + operator.eq, + operator.ne, + _in, + _not_in, +) _cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs)) -_bool_ops_syms = "&", "|", "and", "or" -_bool_ops_funcs = op.and_, op.or_, op.and_, op.or_ +_bool_ops_syms = ("&", "|", "and", "or") +_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) _bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs)) -_arith_ops_syms = "+", "-", "*", "/", "**", "//", "%" -_arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv, op.pow, op.floordiv, op.mod) +_arith_ops_syms = ("+", "-", "*", "/", "**", "//", "%") +_arith_ops_funcs = ( + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.pow, + operator.floordiv, + operator.mod, +) _arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs)) -_special_case_arith_ops_syms = "**", "//", "%" -_special_case_arith_ops_funcs = op.pow, op.floordiv, op.mod +_special_case_arith_ops_syms = ("**", "//", "%") +_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) _special_case_arith_ops_dict = dict( zip(_special_case_arith_ops_syms, _special_case_arith_ops_funcs) ) @@ -371,7 +388,7 @@ def __call__(self, env): """ # handle truediv if self.op == "/" and env.scope["truediv"]: - self.func = op.truediv + self.func = operator.truediv # recurse over the left/right nodes left = self.lhs(env) @@ -502,8 +519,8 @@ def __init__(self, lhs, rhs, truediv, *args, **kwargs): _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) -_unary_ops_syms = "+", "-", "~", "not" -_unary_ops_funcs = op.pos, op.neg, op.invert, op.invert +_unary_ops_syms = ("+", "-", "~", "not") +_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) _unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs)) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d8b4e4127acd1..c47aaf7c773c4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -432,7 +432,7 @@ def apply( b_items = self.items[b.mgr_locs.indexer] for k, obj in aligned_args.items(): - axis = getattr(obj, "_info_axis_number", 0) + axis = obj._info_axis_number kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) applied = getattr(b, f)(**kwargs) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5dd4cc946572c..b9267db76e1a8 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -237,7 +237,8 @@ def _get_values( fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: - """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. + """ + Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 13b994d116c76..bcbd13cdcf017 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -989,13 +989,12 @@ def test_query_with_nested_special_character(self, parser, engine): assert_frame_equal(res, expec) def test_query_lex_compare_strings(self, parser, engine): - import operator as opr a = Series(np.random.choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) - ops = {"<": opr.lt, ">": opr.gt, "<=": opr.le, ">=": opr.ge} + ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge} for op, func in ops.items(): res = df.query('X %s "d"' % op, engine=engine, parser=parser) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 5660fa5ffed80..32e2a72fcef36 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -1,7 +1,7 @@ """ test partial slicing on Series/Frame """ from datetime import datetime -import operator as op +import operator import numpy as np import pytest @@ -408,10 +408,10 @@ def test_loc_datetime_length_one(self): @pytest.mark.parametrize( "op,expected", [ - (op.lt, [True, False, False, False]), - (op.le, [True, True, False, False]), - (op.eq, [False, True, False, False]), - (op.gt, [False, False, False, True]), + (operator.lt, [True, False, False, False]), + (operator.le, [True, True, False, False]), + (operator.eq, [False, True, False, False]), + (operator.gt, [False, False, False, True]), ], ) def test_selection_by_datetimelike(self, datetimelike, op, expected):