diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 3e1e5ed89d877..92374caa29b10 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -33,7 +33,7 @@ def _zip_axes_from_type(typ, new_axes): return axes -def _any_pandas_objects(terms): +def _any_pandas_objects(terms) -> bool: """Check a sequence of terms for instances of PandasObject.""" return any(isinstance(term.value, PandasObject) for term in terms) @@ -144,7 +144,8 @@ def _reconstruct_object(typ, obj, axes, dtype): obj : object The value to use in the type constructor axes : dict - The axes to use to construct the resulting pandas object + The axes to use to construct the resulting pandas object. + dtype : numpy dtype Returns ------- diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index dc6378e83d229..d6e6bd62a8985 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -22,7 +22,7 @@ def _check_ne_builtin_clash(expr): Parameters ---------- - terms : Term + expr : Term Terms can contain """ names = expr.names @@ -46,8 +46,9 @@ def __init__(self, expr): self.aligned_axes = None self.result_type = None - def convert(self): - """Convert an expression for evaluation. + def convert(self) -> str: + """ + Convert an expression for evaluation. Defaults to return the expression as a string. """ @@ -75,10 +76,9 @@ def evaluate(self): ) @property - def _is_aligned(self): + def _is_aligned(self) -> bool: return self.aligned_axes is not None and self.result_type is not None - @abc.abstractmethod def _evaluate(self): """ Return an evaluated expression. @@ -93,7 +93,11 @@ def _evaluate(self): ----- Must be implemented by subclasses. """ - pass + # mypy complains if we use @abc.abstractmethod, so we do use + # AbstractMethodError instead + from pandas.errors import AbstractMethodError + + raise AbstractMethodError(self) class NumExprEngine(AbstractEngine): @@ -101,10 +105,7 @@ class NumExprEngine(AbstractEngine): has_neg_frac = True - def __init__(self, expr): - super().__init__(expr) - - def convert(self): + def convert(self) -> str: return str(super().convert()) def _evaluate(self): @@ -137,9 +138,6 @@ class PythonEngine(AbstractEngine): has_neg_frac = False - def __init__(self, expr): - super().__init__(expr) - def evaluate(self): return self.expr() diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 461561a80a7e5..335ce16257faa 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -15,7 +15,7 @@ from pandas.io.formats.printing import pprint_thing -def _check_engine(engine): +def _check_engine(engine) -> str: """ Make sure a valid engine is passed. @@ -64,7 +64,7 @@ def _check_engine(engine): return engine -def _check_parser(parser): +def _check_parser(parser: str): """ Make sure a valid parser is passed. @@ -97,14 +97,13 @@ def _check_resolvers(resolvers): ) -def _check_expression(expr): +def _check_expression(expr: str): """ Make sure an expression is not an empty string Parameters ---------- - expr : object - An object that can be converted to a string + expr : str Raises ------ @@ -115,7 +114,7 @@ def _check_expression(expr): raise ValueError("expr cannot be an empty string") -def _convert_expression(expr): +def _convert_expression(expr) -> str: """ Convert an object to an expression. @@ -144,7 +143,7 @@ def _convert_expression(expr): return s -def _check_for_locals(expr, stack_level, parser): +def _check_for_locals(expr, stack_level: int, parser): from pandas.core.computation.expr import tokenize_string at_top_of_stack = stack_level == 0 @@ -168,15 +167,15 @@ def _check_for_locals(expr, stack_level, parser): def eval( expr, - parser="pandas", + parser: str = "pandas", engine=None, truediv=True, local_dict=None, global_dict=None, resolvers=(), - level=0, + level: int = 0, target=None, - inplace=False, + inplace: bool = False, ): """ Evaluate a Python expression as a string using various backends. @@ -192,7 +191,7 @@ def eval( Parameters ---------- - expr : str or unicode + expr : str The expression to evaluate. This string cannot contain any Python `statements `__, @@ -232,7 +231,7 @@ def eval( ``DataFrame.index`` and ``DataFrame.columns`` variables that refer to their respective :class:`~pandas.DataFrame` instance attributes. - level : int, optional + level : int, default 0 The number of prior stack frames to traverse and add to the current scope. Most users will **not** need to change this parameter. target : object, optional, default None diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 39653c3d695b2..cf9ed96dfbed7 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -7,7 +7,7 @@ import itertools as it import operator import tokenize -from typing import Type +from typing import Optional, Type import numpy as np @@ -40,7 +40,7 @@ import pandas.io.formats.printing as printing -def tokenize_string(source): +def tokenize_string(source: str): """ Tokenize a Python source code string. @@ -68,7 +68,8 @@ def tokenize_string(source): def _rewrite_assign(tok): - """Rewrite the assignment operator for PyTables expressions that use ``=`` + """ + Rewrite the assignment operator for PyTables expressions that use ``=`` as a substitute for ``==``. Parameters @@ -86,7 +87,8 @@ def _rewrite_assign(tok): def _replace_booleans(tok): - """Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise + """ + Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise precedence is changed to boolean precedence. Parameters @@ -110,7 +112,8 @@ def _replace_booleans(tok): def _replace_locals(tok): - """Replace local variables with a syntactically valid name. + """ + Replace local variables with a syntactically valid name. Parameters ---------- @@ -135,7 +138,8 @@ def _replace_locals(tok): def _clean_spaces_backtick_quoted_names(tok): - """Clean up a column name if surrounded by backticks. + """ + Clean up a column name if surrounded by backticks. Backtick quoted string are indicated by a certain tokval value. If a string is a backtick quoted token it will processed by @@ -303,7 +307,8 @@ def f(self, *args, **kwargs): def disallow(nodes): - """Decorator to disallow certain nodes from parsing. Raises a + """ + Decorator to disallow certain nodes from parsing. Raises a NotImplementedError instead. Returns @@ -324,7 +329,8 @@ def disallowed(cls): def _op_maker(op_class, op_symbol): - """Return a function to create an op class with its symbol already passed. + """ + Return a function to create an op class with its symbol already passed. Returns ------- @@ -332,8 +338,8 @@ def _op_maker(op_class, op_symbol): """ def f(self, node, *args, **kwargs): - """Return a partial function with an Op subclass with an operator - already passed. + """ + Return a partial function with an Op subclass with an operator already passed. Returns ------- @@ -813,18 +819,27 @@ class Expr: parser : str, optional, default 'pandas' env : Scope, optional, default None truediv : bool, optional, default True - level : int, optional, default 2 + level : int, optional, default 0 """ def __init__( - self, expr, engine="numexpr", parser="pandas", env=None, truediv=True, level=0 + self, + expr, + engine: str = "numexpr", + parser: str = "pandas", + env=None, + truediv: bool = True, + level: int = 0, ): self.expr = expr self.env = env or Scope(level=level + 1) self.engine = engine self.parser = parser self.env.scope["truediv"] = truediv - self._visitor = _parsers[parser](self.env, self.engine, self.parser) + self._visitor = _parsers[parser]( + self.env, self.engine, self.parser + ) # type: Optional[BaseExprVisitor] + assert isinstance(self._visitor, BaseExprVisitor), type(self._visitor) self.terms = self.parse() @property @@ -837,7 +852,7 @@ def __call__(self): def __repr__(self) -> str: return printing.pprint_thing(self.terms) - def __len__(self): + def __len__(self) -> int: return len(self.expr) def parse(self): diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 46bc762e1a0b3..d7c38af5539cb 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -157,7 +157,7 @@ def _where_numexpr(cond, a, b): set_use_numexpr(get_option("compute.use_numexpr")) -def _has_bool_dtype(x): +def _has_bool_dtype(x) -> bool: if isinstance(x, ABCDataFrame): return "bool" in x.dtypes try: diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index fe74b6994be7c..9f24b895b9f10 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -57,10 +57,10 @@ class UndefinedVariableError(NameError): def __init__(self, name, is_local): if is_local: - msg = "local variable {0!r} is not defined" + msg = "local variable {name!r} is not defined" else: - msg = "name {0!r} is not defined" - super().__init__(msg.format(name)) + msg = "name {name!r} is not defined" + super().__init__(msg.format(name=name)) class Term: @@ -79,7 +79,7 @@ def __init__(self, name, env, side=None, encoding=None): self.encoding = encoding @property - def local_name(self): + def local_name(self) -> str: return self.name.replace(_LOCAL_TAG, "") def __repr__(self) -> str: @@ -120,7 +120,7 @@ def update(self, value): self.value = value @property - def is_scalar(self): + def is_scalar(self) -> bool: return is_scalar(self._value) @property @@ -139,14 +139,14 @@ def type(self): return_type = type @property - def raw(self): + def raw(self) -> str: return pprint_thing( - "{0}(name={1!r}, type={2})" - "".format(self.__class__.__name__, self.name, self.type) + "{cls}(name={name!r}, type={typ})" + "".format(cls=self.__class__.__name__, name=self.name, typ=self.type) ) @property - def is_datetime(self): + def is_datetime(self) -> bool: try: t = self.type.type except AttributeError: @@ -167,7 +167,7 @@ def name(self): return self._name @property - def ndim(self): + def ndim(self) -> int: return self._value.ndim @@ -209,8 +209,8 @@ def __repr__(self) -> str: Print a generic n-ary operator and its operands using infix notation. """ # recurse over the operands - parened = ("({0})".format(pprint_thing(opr)) for opr in self.operands) - return pprint_thing(" {0} ".format(self.op).join(parened)) + parened = ("({opr})".format(opr=pprint_thing(opr)) for opr in self.operands) + return pprint_thing(" {op} ".format(op=self.op).join(parened)) @property def return_type(self): @@ -220,7 +220,7 @@ def return_type(self): return _result_type_many(*(term.type for term in com.flatten(self))) @property - def has_invalid_return_type(self): + def has_invalid_return_type(self) -> bool: types = self.operand_types obj_dtype_set = frozenset([np.dtype("object")]) return self.return_type == object and types - obj_dtype_set @@ -230,11 +230,11 @@ def operand_types(self): return frozenset(term.type for term in com.flatten(self)) @property - def is_scalar(self): + def is_scalar(self) -> bool: return all(operand.is_scalar for operand in self.operands) @property - def is_datetime(self): + def is_datetime(self) -> bool: try: t = self.return_type.type except AttributeError: @@ -339,7 +339,7 @@ def _cast_inplace(terms, acceptable_dtypes, dtype): term.update(new_value) -def is_term(obj): +def is_term(obj) -> bool: return isinstance(obj, Term) @@ -354,7 +354,7 @@ class BinOp(Op): right : Term or Op """ - def __init__(self, op, lhs, rhs, **kwargs): + def __init__(self, op: str, lhs, rhs, **kwargs): super().__init__(op, (lhs, rhs)) self.lhs = lhs self.rhs = rhs @@ -369,9 +369,10 @@ def __init__(self, op, lhs, rhs, **kwargs): # has to be made a list for python3 keys = list(_binary_ops_dict.keys()) raise ValueError( - "Invalid binary operator {0!r}, valid" - " operators are {1}".format(op, keys) + "Invalid binary operator {op!r}, valid" + " operators are {keys}".format(op=op, keys=keys) ) + assert not kwargs, kwargs def __call__(self, env): """ @@ -396,7 +397,7 @@ def __call__(self, env): return self.func(left, right) - def evaluate(self, env, engine, parser, term_type, eval_in_python): + def evaluate(self, env, engine: str, parser: str, term_type, eval_in_python): """ Evaluate a binary operation *before* being passed to the engine. @@ -446,6 +447,7 @@ def evaluate(self, env, engine, parser, term_type, eval_in_python): def convert_values(self): """Convert datetimes to a comparable value in an expression. """ + assert self.encoding is None, self.encoding def stringify(value): if self.encoding is not None: @@ -488,7 +490,7 @@ def _disallow_scalar_only_bool_ops(self): raise NotImplementedError("cannot evaluate scalar only bool ops") -def isnumeric(dtype): +def isnumeric(dtype) -> bool: return issubclass(np.dtype(dtype).type, np.number) @@ -549,8 +551,8 @@ def __init__(self, op, operand): self.func = _unary_ops_dict[op] except KeyError: raise ValueError( - "Invalid unary operator {0!r}, valid operators " - "are {1}".format(op, _unary_ops_syms) + "Invalid unary operator {op!r}, valid operators " + "are {syms}".format(op=op, syms=_unary_ops_syms) ) def __call__(self, env): @@ -558,7 +560,7 @@ def __call__(self, env): return self.func(operand) def __repr__(self) -> str: - return pprint_thing("{0}({1})".format(self.op, self.operand)) + return pprint_thing("{op}({operand})".format(op=self.op, operand=self.operand)) @property def return_type(self): @@ -583,12 +585,12 @@ def __call__(self, env): return self.func.func(*operands) def __repr__(self) -> str: - operands = map(str, self.operands) - return pprint_thing("{0}({1})".format(self.op, ",".join(operands))) + operands = ",".join(str(x) for x in self.operands) + return pprint_thing("{op}({operands})".format(op=self.op, operands=operands)) class FuncNode: - def __init__(self, name): + def __init__(self, name: str): from pandas.core.computation.check import _NUMEXPR_INSTALLED, _NUMEXPR_VERSION if name not in _mathops or ( @@ -596,7 +598,7 @@ def __init__(self, name): and _NUMEXPR_VERSION < LooseVersion("2.6.9") and name in ("floor", "ceil") ): - raise ValueError('"{0}" is not a supported function'.format(name)) + raise ValueError('"{name}" is not a supported function'.format(name=name)) self.name = name self.func = getattr(np, name) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 3a2ea30cbc8b9..f1be87477a2c8 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -2,6 +2,7 @@ import ast from functools import partial +from typing import Optional import numpy as np @@ -33,9 +34,6 @@ def __new__(cls, name, env, side=None, encoding=None): klass = Constant if not isinstance(name, str) else cls return object.__new__(klass) - def __init__(self, name, env, side=None, encoding=None): - super().__init__(name, env, side=side, encoding=encoding) - def _resolve_name(self): # must be a queryables if self.side == "left": @@ -56,9 +54,6 @@ def value(self): class Constant(Term): - def __init__(self, value, env, side=None, encoding=None): - super().__init__(value, env, side=side, encoding=encoding) - def _resolve_name(self): return self._name @@ -129,12 +124,12 @@ def conform(self, rhs): return rhs @property - def is_valid(self): + def is_valid(self) -> bool: """ return True if this is a valid field """ return self.lhs in self.queryables @property - def is_in_table(self): + def is_in_table(self) -> bool: """ return True if this is a valid column name for generation (e.g. an actual column in the table) """ return self.queryables.get(self.lhs) is not None @@ -200,7 +195,7 @@ def stringify(value): return TermValue(v, v, kind) elif kind == "bool": if isinstance(v, str): - v = not v.strip().lower() in [ + v = v.strip().lower() not in [ "false", "f", "no", @@ -253,6 +248,7 @@ def evaluate(self): rhs = self.conform(self.rhs) values = [TermValue(v, v, self.kind).value for v in rhs] + # TODO: Isnt TermValue(v, v, self.kind).value just `v`? if self.is_in_table: @@ -478,7 +474,8 @@ def _validate_where(w): class Expr(expr.Expr): - """ hold a pytables like expression, comprised of possibly multiple 'terms' + """ + Hold a pytables like expression, comprised of possibly multiple 'terms'. Parameters ---------- @@ -486,6 +483,7 @@ class Expr(expr.Expr): queryables : a "kinds" map (dict of column name -> kind), or None if column is non-indexable encoding : an encoding that will encode the query terms + scope_level : int, default 0 Returns ------- @@ -505,7 +503,7 @@ class Expr(expr.Expr): "major_axis>=20130101" """ - def __init__(self, where, queryables=None, encoding=None, scope_level=0): + def __init__(self, where, queryables=None, encoding=None, scope_level: int = 0): where = _validate_where(where) @@ -513,7 +511,7 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): self.condition = None self.filter = None self.terms = None - self._visitor = None + self._visitor = None # type: Optional[ExprVisitor] # capture the environment if needed local_dict = DeepChainMap() @@ -523,13 +521,16 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): where = where.expr elif isinstance(where, (list, tuple)): + # TODO: could disallow tuple arg? + where = list(where) for idx, w in enumerate(where): if isinstance(w, Expr): local_dict = w.env.scope else: w = _validate_where(w) where[idx] = w - where = " & ".join(map("({})".format, com.flatten(where))) # noqa + wheres = ["({x})".format(x=x) for x in com.flatten(where)] + where = " & ".join(wheres) self.expr = where self.env = Scope(scope_level + 1, local_dict=local_dict) @@ -574,7 +575,7 @@ def evaluate(self): class TermValue: """ hold a term value the we use to construct a condition/filter """ - def __init__(self, value, converted, kind): + def __init__(self, value, converted, kind: str): self.value = value self.converted = converted self.kind = kind @@ -593,7 +594,7 @@ def tostring(self, encoding): return self.converted -def maybe_expression(s): +def maybe_expression(s) -> bool: """ loose checking if s is a pytables-acceptable expression """ if not isinstance(s, str): return False diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 81c7b04bf3284..36710b774cc5a 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -9,6 +9,7 @@ import pprint import struct import sys +from typing import Mapping, Tuple import numpy as np @@ -17,7 +18,7 @@ def _ensure_scope( - level, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs + level: int, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs ): """Ensure that we are grabbing the correct scope.""" return Scope( @@ -29,9 +30,11 @@ def _ensure_scope( ) -def _replacer(x): - """Replace a number with its hexadecimal representation. Used to tag - temporary variables with their calling scope's id. +def _replacer(x) -> str: + """ + Replace a number with its hexadecimal representation. + + Used to tag temporary variables with their calling scope's id. """ # get the hex repr of the binary char and remove 0x and pad by pad_size # zeros @@ -44,7 +47,7 @@ def _replacer(x): return hex(hexin) -def _raw_hex_id(obj): +def _raw_hex_id(obj) -> str: """Return the padded hexadecimal id of ``obj``.""" # interpret as a pointer since that's what really what id returns packed = struct.pack("@P", id(obj)) @@ -63,7 +66,7 @@ def _raw_hex_id(obj): } -def _get_pretty_string(obj): +def _get_pretty_string(obj) -> str: """ Return a prettier version of obj. @@ -106,7 +109,12 @@ class Scope: __slots__ = ["level", "scope", "target", "resolvers", "temps"] def __init__( - self, level, global_dict=None, local_dict=None, resolvers=(), target=None + self, + level: int, + global_dict=None, + local_dict=None, + resolvers: Tuple = (), + target=None, ): self.level = level + 1 @@ -127,17 +135,21 @@ def __init__( # shallow copy here because we don't want to replace what's in # scope when we align terms (alignment accesses the underlying # numpy array of pandas objects) - self.scope = self.scope.new_child((global_dict or frame.f_globals).copy()) + self.scope = DeepChainMap( + self.scope.new_child((global_dict or frame.f_globals).copy()) + ) if not isinstance(local_dict, Scope): - self.scope = self.scope.new_child((local_dict or frame.f_locals).copy()) + self.scope = DeepChainMap( + self.scope.new_child((local_dict or frame.f_locals).copy()) + ) finally: del frame # assumes that resolvers are going from outermost scope to inner if isinstance(local_dict, Scope): resolvers += tuple(local_dict.resolvers.maps) - self.resolvers = DeepChainMap(*resolvers) - self.temps = {} + self.resolvers = DeepChainMap(*resolvers) # type: DeepChainMap + self.temps = {} # type: Mapping def __repr__(self) -> str: scope_keys = _get_pretty_string(list(self.scope.keys())) @@ -148,19 +160,20 @@ def __repr__(self) -> str: ) @property - def has_resolvers(self): - """Return whether we have any extra scope. + def has_resolvers(self) -> bool: + """ + Return whether we have any extra scope. For example, DataFrames pass Their columns as resolvers during calls to ``DataFrame.eval()`` and ``DataFrame.query()``. Returns ------- - hr : bool + bool """ return bool(len(self.resolvers)) - def resolve(self, key, is_local): + def resolve(self, key: str, is_local: bool): """ Resolve a variable name in a possibly local context. @@ -202,7 +215,7 @@ def resolve(self, key, is_local): raise UndefinedVariableError(key, is_local) - def swapkey(self, old_key, new_key, new_value=None): + def swapkey(self, old_key: str, new_key: str, new_value=None): """ Replace a variable name, with a potentially new value. @@ -223,6 +236,7 @@ def swapkey(self, old_key, new_key, new_value=None): maps.append(self.temps) for mapping in maps: + assert isinstance(mapping, (DeepChainMap, dict)), type(mapping) if old_key in mapping: mapping[new_key] = new_value return @@ -250,7 +264,7 @@ def _get_vars(self, stack, scopes): # scope after the loop del frame - def update(self, level): + def update(self, level: int): """ Update the current scope by going back `level` levels. @@ -270,7 +284,7 @@ def update(self, level): finally: del stack[:], stack - def add_tmp(self, value): + def add_tmp(self, value) -> str: """ Add a temporary variable to the scope. @@ -281,7 +295,7 @@ def add_tmp(self, value): Returns ------- - name : basestring + name : str The name of the temporary variable created. """ name = "{name}_{num}_{hex_id}".format( @@ -290,6 +304,7 @@ def add_tmp(self, value): # add to inner most scope assert name not in self.temps + assert isinstance(self.temps, dict) self.temps[name] = value assert name in self.temps @@ -297,12 +312,12 @@ def add_tmp(self, value): return name @property - def ntemps(self): + def ntemps(self) -> int: """The number of temporary variables in this scope""" return len(self.temps) @property - def full_scope(self): + def full_scope(self) -> DeepChainMap: """ Return the full scope for use with passing to engines transparently as a mapping.