CLN: assorted cleanups (pandas-dev#29232)

jbrockmendel · proost · commit a0cd35e021f5 · 2019-12-20T01:22:45.000+09:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -146,7 +146,8 @@ def group_cumprod_float64(float64_t[:, :] out,
                           int ngroups,
                           bint is_datetimelike,
                           bint skipna=True):
-    """Cumulative product of columns of `values`, in row groups `labels`.
+    """
+    Cumulative product of columns of `values`, in row groups `labels`.
 
     Parameters
     ----------
@@ -203,7 +204,8 @@ def group_cumsum(numeric[:, :] out,
                  int ngroups,
                  is_datetimelike,
                  bint skipna=True):
-    """Cumulative sum of columns of `values`, in row groups `labels`.
+    """
+    Cumulative sum of columns of `values`, in row groups `labels`.
 
     Parameters
     ----------
@@ -314,7 +316,8 @@ def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
                          ndarray[uint8_t] mask, object direction,
                          int64_t limit):
-    """Indexes how to fill values forwards or backwards within a group
+    """
+    Indexes how to fill values forwards or backwards within a group.
 
     Parameters
     ----------
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -227,7 +227,7 @@ cdef class IndexEngine:
     cdef _get_index_values(self):
         return self.vgetter()
 
-    def _call_monotonic(self, values):
+    cdef _call_monotonic(self, values):
         return algos.is_monotonic(values, timelike=False)
 
     def get_backfill_indexer(self, other, limit=None):
@@ -236,7 +236,7 @@ cdef class IndexEngine:
     def get_pad_indexer(self, other, limit=None):
         return algos.pad(self._get_index_values(), other, limit=limit)
 
-    cdef _make_hash_table(self, n):
+    cdef _make_hash_table(self, Py_ssize_t n):
         raise NotImplementedError
 
     cdef _check_type(self, object val):
@@ -262,7 +262,7 @@ cdef class IndexEngine:
 
         self.need_unique_check = 0
 
-    cpdef _call_map_locations(self, values):
+    cdef void _call_map_locations(self, values):
         self.mapping.map_locations(values)
 
     def clear_mapping(self):
@@ -391,7 +391,7 @@ cdef class ObjectEngine(IndexEngine):
     """
     Index Engine for use with object-dtype Index, namely the base class Index
     """
-    cdef _make_hash_table(self, n):
+    cdef _make_hash_table(self, Py_ssize_t n):
         return _hash.PyObjectHashTable(n)
 
 
@@ -418,7 +418,7 @@ cdef class DatetimeEngine(Int64Engine):
     cdef _get_index_values(self):
         return self.vgetter().view('i8')
 
-    def _call_monotonic(self, values):
+    cdef _call_monotonic(self, values):
         return algos.is_monotonic(values, timelike=True)
 
     cpdef get_loc(self, object val):
@@ -500,11 +500,13 @@ cdef class PeriodEngine(Int64Engine):
     cdef _get_index_values(self):
         return super(PeriodEngine, self).vgetter()
 
-    cpdef _call_map_locations(self, values):
-        super(PeriodEngine, self)._call_map_locations(values.view('i8'))
+    cdef void _call_map_locations(self, values):
+        # super(...) pattern doesn't seem to work with `cdef`
+        Int64Engine._call_map_locations(self, values.view('i8'))
 
-    def _call_monotonic(self, values):
-        return super(PeriodEngine, self)._call_monotonic(values.view('i8'))
+    cdef _call_monotonic(self, values):
+        # super(...) pattern doesn't seem to work with `cdef`
+        return Int64Engine._call_monotonic(self, values.view('i8'))
 
     def get_indexer(self, values):
         cdef ndarray[int64_t, ndim=1] ordinals
diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
@@ -29,7 +29,7 @@ dtypes = [('Float64', 'float64', 'float64_t', 'Float64', 'float64'),
 
 cdef class {{name}}Engine(IndexEngine):
 
-    cdef _make_hash_table(self, n):
+    cdef _make_hash_table(self, Py_ssize_t n):
         return _hash.{{hashtable_name}}HashTable(n)
 
     {{if name not in {'Float64', 'Float32'} }}
@@ -38,7 +38,7 @@ cdef class {{name}}Engine(IndexEngine):
             raise KeyError(val)
     {{endif}}
 
-    cpdef _call_map_locations(self, values):
+    cdef void _call_map_locations(self, values):
         # self.mapping is of type {{hashtable_name}}HashTable,
         # so convert dtype of values
         self.mapping.map_locations(algos.ensure_{{hashtable_dtype}}(values))
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -319,7 +319,7 @@ cdef slice_getitem(slice slc, ind):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cpdef slice indexer_as_slice(int64_t[:] vals):
+cdef slice indexer_as_slice(int64_t[:] vals):
     cdef:
         Py_ssize_t i, n, start, stop
         int64_t d
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2168,8 +2168,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask,
-                   bint convert=1):
+def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=1):
     """
     Substitute for np.vectorize with pandas-friendly dtype inference
 
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -105,7 +105,7 @@ cdef class Reducer:
             flatiter it
             bint has_labels, has_ndarray_labels
             object res, name, labels, index
-            object cached_typ=None
+            object cached_typ = None
 
         arr = self.arr
         chunk = self.dummy
@@ -248,7 +248,7 @@ cdef class SeriesBinGrouper:
             object res
             bint initialized = 0
             Slider vslider, islider
-            object name, cached_typ=None, cached_ityp=None
+            object name, cached_typ = None, cached_ityp = None
 
         counts = np.zeros(self.ngroups, dtype=np.int64)
 
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -581,7 +581,7 @@ def try_parse_dates(object[:] values, parser=None,
                 else:
                     result[i] = parse_date(values[i])
         except Exception:
-            # Since parser is user-defined, we can't guess what it migh raise
+            # Since parser is user-defined, we can't guess what it might raise
             return values
     else:
         parse_date = parser
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -260,9 +260,9 @@ def _get_data_algo(values):
         if lib.infer_dtype(values, skipna=False) in ["string"]:
             ndtype = "string"
 
-    f = _hashtables.get(ndtype, _hashtables["object"])
+    htable = _hashtables.get(ndtype, _hashtables["object"])
 
-    return f, values
+    return htable, values
 
 
 # --------------- #
diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py
@@ -29,7 +29,7 @@ def _check_ne_builtin_clash(expr):
     overlap = names & _ne_builtins
 
     if overlap:
-        s = ", ".join(map(repr, overlap))
+        s = ", ".join(repr(x) for x in overlap)
         raise NumExprClobberingError(
             'Variables in expression "{expr}" '
             "overlap with builtins: ({s})".format(expr=expr, s=s)
diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from distutils.version import LooseVersion
 from functools import partial
-import operator as op
+import operator
 
 import numpy as np
 
@@ -18,7 +18,7 @@
 
 from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded
 
-_reductions = "sum", "prod"
+_reductions = ("sum", "prod")
 
 _unary_math_ops = (
     "sin",
@@ -273,20 +273,37 @@ def _not_in(x, y):
         return x not in y
 
 
-_cmp_ops_syms = ">", "<", ">=", "<=", "==", "!=", "in", "not in"
-_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne, _in, _not_in
+_cmp_ops_syms = (">", "<", ">=", "<=", "==", "!=", "in", "not in")
+_cmp_ops_funcs = (
+    operator.gt,
+    operator.lt,
+    operator.ge,
+    operator.le,
+    operator.eq,
+    operator.ne,
+    _in,
+    _not_in,
+)
 _cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs))
 
-_bool_ops_syms = "&", "|", "and", "or"
-_bool_ops_funcs = op.and_, op.or_, op.and_, op.or_
+_bool_ops_syms = ("&", "|", "and", "or")
+_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
 _bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs))
 
-_arith_ops_syms = "+", "-", "*", "/", "**", "//", "%"
-_arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv, op.pow, op.floordiv, op.mod)
+_arith_ops_syms = ("+", "-", "*", "/", "**", "//", "%")
+_arith_ops_funcs = (
+    operator.add,
+    operator.sub,
+    operator.mul,
+    operator.truediv,
+    operator.pow,
+    operator.floordiv,
+    operator.mod,
+)
 _arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs))
 
-_special_case_arith_ops_syms = "**", "//", "%"
-_special_case_arith_ops_funcs = op.pow, op.floordiv, op.mod
+_special_case_arith_ops_syms = ("**", "//", "%")
+_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod)
 _special_case_arith_ops_dict = dict(
     zip(_special_case_arith_ops_syms, _special_case_arith_ops_funcs)
 )
@@ -371,7 +388,7 @@ def __call__(self, env):
         """
         # handle truediv
         if self.op == "/" and env.scope["truediv"]:
-            self.func = op.truediv
+            self.func = operator.truediv
 
         # recurse over the left/right nodes
         left = self.lhs(env)
@@ -502,8 +519,8 @@ def __init__(self, lhs, rhs, truediv, *args, **kwargs):
         _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
 
 
-_unary_ops_syms = "+", "-", "~", "not"
-_unary_ops_funcs = op.pos, op.neg, op.invert, op.invert
+_unary_ops_syms = ("+", "-", "~", "not")
+_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
 _unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs))
 
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -432,7 +432,7 @@ def apply(
                 b_items = self.items[b.mgr_locs.indexer]
 
                 for k, obj in aligned_args.items():
-                    axis = getattr(obj, "_info_axis_number", 0)
+                    axis = obj._info_axis_number
                     kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
 
             applied = getattr(b, f)(**kwargs)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -237,7 +237,8 @@ def _get_values(
     fill_value_typ: Optional[str] = None,
     mask: Optional[np.ndarray] = None,
 ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]:
-    """ Utility to get the values view, mask, dtype, dtype_max, and fill_value.
+    """
+    Utility to get the values view, mask, dtype, dtype_max, and fill_value.
 
     If both mask and fill_value/fill_value_typ are not None and skipna is True,
     the values array will be copied.
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -989,13 +989,12 @@ def test_query_with_nested_special_character(self, parser, engine):
         assert_frame_equal(res, expec)
 
     def test_query_lex_compare_strings(self, parser, engine):
-        import operator as opr
 
         a = Series(np.random.choice(list("abcde"), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({"X": a, "Y": b})
 
-        ops = {"<": opr.lt, ">": opr.gt, "<=": opr.le, ">=": opr.ge}
+        ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge}
 
         for op, func in ops.items():
             res = df.query('X %s "d"' % op, engine=engine, parser=parser)
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -1,7 +1,7 @@
 """ test partial slicing on Series/Frame """
 
 from datetime import datetime
-import operator as op
+import operator
 
 import numpy as np
 import pytest
@@ -408,10 +408,10 @@ def test_loc_datetime_length_one(self):
     @pytest.mark.parametrize(
         "op,expected",
         [
-            (op.lt, [True, False, False, False]),
-            (op.le, [True, True, False, False]),
-            (op.eq, [False, True, False, False]),
-            (op.gt, [False, False, False, True]),
+            (operator.lt, [True, False, False, False]),
+            (operator.le, [True, True, False, False]),
+            (operator.eq, [False, True, False, False]),
+            (operator.gt, [False, False, False, True]),
         ],
     )
     def test_selection_by_datetimelike(self, datetimelike, op, expected):