diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index a9237c239701b..f904781178656 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -32,7 +32,7 @@ check each issue individually, and it's not possible to find the unassigned ones For this reason, we implemented a workaround consisting of adding a comment with the exact text `take`. When you do it, a GitHub action will automatically assign you the issue -(this will take seconds, and may require refreshint the page to see it). +(this will take seconds, and may require refreshing the page to see it). By doing this, it's possible to filter the list of issues and find only the unassigned ones. So, a good way to find an issue to start contributing to pandas is to check the list of diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 920919755dc23..54175fada6e56 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -54,7 +54,7 @@ Other API changes - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) -- +- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -148,7 +148,7 @@ Indexing ^^^^^^^^ - Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`) - Bug in :meth:`PeriodIndex.get_loc` treating higher-resolution strings differently from :meth:`PeriodIndex.get_value` (:issue:`31172`) -- +- Bug in :meth:`Series.at` and :meth:`DataFrame.at` not matching ``.loc`` behavior when looking up an integer in a :class:`Float64Index` (:issue:`31329`) Missing ^^^^^^^ diff --git a/pandas/_typing.py b/pandas/_typing.py index 171b76b4d2c4b..e2858441605f7 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -45,6 +45,7 @@ # other Dtype = Union[str, np.dtype, "ExtensionDtype"] +DtypeObj = Union[np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] # FrameOrSeriesUnion means either a DataFrame or a Series. E.g. diff --git a/pandas/core/base.py b/pandas/core/base.py index 05e3302abddbe..9fe1af776dd2b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,6 +1,7 @@ """ Base and utility classes for pandas objects. """ + import builtins import textwrap from typing import Dict, FrozenSet, List, Optional, Union @@ -45,11 +46,15 @@ class PandasObject(DirNamesMixin): - """baseclass for various pandas objects""" + """ + Baseclass for various pandas objects. + """ @property def _constructor(self): - """class constructor (for this class it's just `__class__`""" + """ + Class constructor (for this class it's just `__class__`. + """ return type(self) def __repr__(self) -> str: @@ -77,16 +82,14 @@ def __sizeof__(self): """ if hasattr(self, "memory_usage"): mem = self.memory_usage(deep=True) - if not is_scalar(mem): - mem = mem.sum() - return int(mem) + return int(mem if is_scalar(mem) else mem.sum()) - # no memory_usage attribute, so fall back to - # object's 'sizeof' + # no memory_usage attribute, so fall back to object's 'sizeof' return super().__sizeof__() def _ensure_type(self: T, obj) -> T: - """Ensure that an object has same type as self. + """ + Ensure that an object has same type as self. Used by type checkers. """ @@ -95,7 +98,8 @@ def _ensure_type(self: T, obj) -> T: class NoNewAttributesMixin: - """Mixin which prevents adding new attributes. + """ + Mixin which prevents adding new attributes. Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using @@ -106,7 +110,9 @@ class NoNewAttributesMixin: """ def _freeze(self): - """Prevents setting additional attributes""" + """ + Prevents setting additional attributes. + """ object.__setattr__(self, "__frozen", True) # prevent adding any attribute via s.xxx.new_attribute = ... @@ -180,14 +186,12 @@ class SelectionMixin: @property def _selection_name(self): """ - return a name for myself; this would ideally be called - the 'name' property, but we cannot conflict with the - Series.name property which can be set + Return a name for myself; + + This would ideally be called the 'name' property, + but we cannot conflict with the Series.name property which can be set. """ - if self._selection is None: - return None # 'result' - else: - return self._selection + return self._selection @property def _selection_list(self): @@ -199,7 +203,6 @@ def _selection_list(self): @cache_readonly def _selected_obj(self): - if self._selection is None or isinstance(self.obj, ABCSeries): return self.obj else: @@ -246,12 +249,11 @@ def _gotitem(self, key, ndim: int, subset=None): Parameters ---------- - key : string / list of selections + key : str / list of selections ndim : 1,2 requested ndim of result subset : object, default None subset to act on - """ raise AbstractMethodError(self) @@ -266,7 +268,6 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): - try to find a function (or attribute) on ourselves - try to find a numpy function - raise - """ assert isinstance(arg, str) @@ -585,7 +586,6 @@ def _shallow_copy(self, obj, **kwargs): """ return a new object with the replacement attributes """ - if isinstance(obj, self._constructor): obj = obj.obj for attr in self._attributes: @@ -669,8 +669,7 @@ def item(self): if len(self) == 1: return next(iter(self)) - else: - raise ValueError("can only convert an array of size 1 to a Python scalar") + raise ValueError("can only convert an array of size 1 to a Python scalar") @property def nbytes(self) -> int: @@ -735,7 +734,6 @@ def array(self) -> ExtensionArray: Examples -------- - For regular NumPy types like int, and float, a PandasArray is returned. @@ -851,12 +849,11 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): """ if is_extension_array_dtype(self.dtype): return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) - else: - if kwargs: - msg = "to_numpy() got an unexpected keyword argument '{}'".format( - list(kwargs.keys())[0] - ) - raise TypeError(msg) + elif kwargs: + bad_keys = list(kwargs.keys())[0] + raise TypeError( + f"to_numpy() got an unexpected keyword argument '{bad_keys}'" + ) result = np.asarray(self._values, dtype=dtype) # TODO(GH-24345): Avoid potential double copy @@ -1076,7 +1073,9 @@ def _reduce( filter_type=None, **kwds, ): - """ perform the reduction type operation if we can """ + """ + Perform the reduction type operation if we can. + """ func = getattr(self, name, None) if func is None: raise TypeError( @@ -1103,9 +1102,7 @@ def _map_values(self, mapper, na_action=None): The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. - """ - # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types @@ -1341,7 +1338,9 @@ def is_monotonic(self) -> bool: @property def is_monotonic_increasing(self) -> bool: - """alias for is_monotonic""" + """ + Alias for is_monotonic. + """ # mypy complains if we alias directly return self.is_monotonic @@ -1455,7 +1454,6 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - >>> x = pd.Series([1, 2, 3]) >>> x 0 1 diff --git a/pandas/core/common.py b/pandas/core/common.py index 8c52999c4a79e..a76119da2707a 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -72,16 +72,6 @@ def consensus_name_attr(objs): return name -def maybe_box(indexer, values, obj, key): - - # if we have multiples coming back, box em - if isinstance(values, np.ndarray): - return obj[indexer.get_loc(key)] - - # return the value - return values - - def maybe_box_datetimelike(value): # turn a datetime like into a Timestamp/timedelta as needed diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index d91586e6c9b81..c26208d3b4465 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -1,11 +1,12 @@ -""":func:`~pandas.eval` parsers +""" +:func:`~pandas.eval` parsers. """ import ast from functools import partial, reduce from keyword import iskeyword import tokenize -from typing import Optional, Type +from typing import Callable, Optional, Set, Tuple, Type, TypeVar import numpy as np @@ -34,8 +35,9 @@ import pandas.io.formats.printing as printing -def _rewrite_assign(tok): - """Rewrite the assignment operator for PyTables expressions that use ``=`` +def _rewrite_assign(tok: Tuple[int, str]) -> Tuple[int, str]: + """ + Rewrite the assignment operator for PyTables expressions that use ``=`` as a substitute for ``==``. Parameters @@ -45,15 +47,16 @@ def _rewrite_assign(tok): Returns ------- - t : tuple of int, str + tuple of int, str Either the input or token or the replacement values """ toknum, tokval = tok return toknum, "==" if tokval == "=" else tokval -def _replace_booleans(tok): - """Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise +def _replace_booleans(tok: Tuple[int, str]) -> Tuple[int, str]: + """ + Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise precedence is changed to boolean precedence. Parameters @@ -63,7 +66,7 @@ def _replace_booleans(tok): Returns ------- - t : tuple of int, str + tuple of int, str Either the input or token or the replacement values """ toknum, tokval = tok @@ -76,8 +79,9 @@ def _replace_booleans(tok): return toknum, tokval -def _replace_locals(tok): - """Replace local variables with a syntactically valid name. +def _replace_locals(tok: Tuple[int, str]) -> Tuple[int, str]: + """ + Replace local variables with a syntactically valid name. Parameters ---------- @@ -86,7 +90,7 @@ def _replace_locals(tok): Returns ------- - t : tuple of int, str + tuple of int, str Either the input or token or the replacement values Notes @@ -102,12 +106,16 @@ def _replace_locals(tok): def _compose2(f, g): - """Compose 2 callables""" + """ + Compose 2 callables. + """ return lambda *args, **kwargs: f(g(*args, **kwargs)) def _compose(*funcs): - """Compose 2 or more callables""" + """ + Compose 2 or more callables. + """ assert len(funcs) > 1, "At least 2 callables must be passed to compose" return reduce(_compose2, funcs) @@ -117,8 +125,9 @@ def _preparse( f=_compose( _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks ), -): - """Compose a collection of tokenization functions +) -> str: + """ + Compose a collection of tokenization functions. Parameters ---------- @@ -132,7 +141,7 @@ def _preparse( Returns ------- - s : str + str Valid Python source code Notes @@ -146,7 +155,9 @@ def _preparse( def _is_type(t): - """Factory for a type checking function of type ``t`` or tuple of types.""" + """ + Factory for a type checking function of type ``t`` or tuple of types. + """ return lambda x: isinstance(x.value, t) @@ -164,7 +175,9 @@ def _is_type(t): def _filter_nodes(superclass, all_nodes=_all_nodes): - """Filter out AST nodes that are subclasses of ``superclass``.""" + """ + Filter out AST nodes that are subclasses of ``superclass``. + """ node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass)) return frozenset(node_names) @@ -227,30 +240,35 @@ def _filter_nodes(superclass, all_nodes=_all_nodes): assert not intersection, _msg -def _node_not_implemented(node_name, cls): - """Return a function that raises a NotImplementedError with a passed node - name. +# TODO: Python 3.6.2: replace Callable[..., None] with Callable[..., NoReturn] +def _node_not_implemented(node_name: str) -> Callable[..., None]: + """ + Return a function that raises a NotImplementedError with a passed node name. """ def f(self, *args, **kwargs): - raise NotImplementedError(f"{repr(node_name)} nodes are not implemented") + raise NotImplementedError(f"'{node_name}' nodes are not implemented") return f -def disallow(nodes): - """Decorator to disallow certain nodes from parsing. Raises a +_T = TypeVar("_T", bound="BaseExprVisitor") + + +def disallow(nodes: Set[str]) -> Callable[[Type[_T]], Type[_T]]: + """ + Decorator to disallow certain nodes from parsing. Raises a NotImplementedError instead. Returns ------- - disallowed : callable + callable """ - def disallowed(cls): + def disallowed(cls: Type[_T]) -> Type[_T]: cls.unsupported_nodes = () for node in nodes: - new_method = _node_not_implemented(node, cls) + new_method = _node_not_implemented(node) name = f"visit_{node}" cls.unsupported_nodes += (name,) setattr(cls, name, new_method) @@ -260,20 +278,21 @@ def disallowed(cls): def _op_maker(op_class, op_symbol): - """Return a function to create an op class with its symbol already passed. + """ + Return a function to create an op class with its symbol already passed. Returns ------- - f : callable + callable """ def f(self, node, *args, **kwargs): - """Return a partial function with an Op subclass with an operator - already passed. + """ + Return a partial function with an Op subclass with an operator already passed. Returns ------- - f : callable + callable """ return partial(op_class, op_symbol, *args, **kwargs) @@ -284,7 +303,9 @@ def f(self, node, *args, **kwargs): def add_ops(op_classes): - """Decorator to add default implementation of ops.""" + """ + Decorator to add default implementation of ops. + """ def f(cls): for op_attr_name, op_class in op_classes.items(): @@ -353,6 +374,8 @@ class BaseExprVisitor(ast.NodeVisitor): ast.NotIn: ast.NotIn, } + unsupported_nodes: Tuple[str, ...] + def __init__(self, env, engine, parser, preparser=_preparse): self.env = env self.engine = engine @@ -647,7 +670,7 @@ def visit_Call(self, node, side=None, **kwargs): f'Function "{res.name}" does not support keyword arguments' ) - return res(*new_args, **kwargs) + return res(*new_args) else: @@ -777,12 +800,16 @@ def __len__(self) -> int: return len(self.expr) def parse(self): - """Parse an expression""" + """ + Parse an expression. + """ return self._visitor.visit(self.expr) @property def names(self): - """Get the names in an expression""" + """ + Get the names in an expression. + """ if is_term(self.terms): return frozenset([self.terms.name]) return frozenset(term.name for term in com.flatten(self.terms)) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index f62f03be9b732..eb9b880cd10d9 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -6,7 +6,7 @@ from pandas._libs import algos, lib from pandas._libs.tslibs import conversion -from pandas._typing import ArrayLike +from pandas._typing import ArrayLike, DtypeObj from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -1668,7 +1668,7 @@ def _is_dtype(arr_or_dtype, condition) -> bool: return condition(dtype) -def _get_dtype(arr_or_dtype): +def _get_dtype(arr_or_dtype) -> DtypeObj: """ Get the dtype instance associated with an array or dtype object. @@ -1840,7 +1840,7 @@ def _validate_date_like_dtype(dtype) -> None: ) -def pandas_dtype(dtype): +def pandas_dtype(dtype) -> DtypeObj: """ Convert input into a pandas only dtype object or a numpy dtype object. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fb579f2f58a57..0bc754b3e8fb3 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -8,6 +8,7 @@ from pandas._libs import lib import pandas._libs.missing as libmissing from pandas._libs.tslibs import NaT, iNaT +from pandas._typing import DtypeObj from pandas.core.dtypes.common import ( _NS_DTYPE, @@ -585,7 +586,7 @@ def remove_na_arraylike(arr): return arr[notna(lib.values_from_object(arr))] -def is_valid_nat_for_dtype(obj, dtype) -> bool: +def is_valid_nat_for_dtype(obj, dtype: DtypeObj) -> bool: """ isna check that excludes incompatible dtypes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f3a0cf3841b5b..70e440b49ae6c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -607,7 +607,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: Check if full repr fits in horizontal boundaries imposed by the display options width and max_columns. - In case off non-interactive session, no boundaries apply. + In case of non-interactive session, no boundaries apply. `ignore_width` is here so ipnb+HTML output can behave the way users expect. display.max_columns remains in effect. @@ -874,8 +874,8 @@ def style(self) -> "Styler": polar bear 22000 koala marsupial 80000 >>> for label, content in df.items(): - ... print('label:', label) - ... print('content:', content, sep='\n') + ... print(f'label: {label}') + ... print(f'content: {content}', sep='\n') ... label: species content: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a2e348bf98e33..9bbad7b74e7d3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7206,7 +7206,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if isinstance(self, ABCSeries): threshold = self._constructor(threshold, index=self.index) else: - threshold = _align_method_FRAME(self, threshold, axis) + threshold = _align_method_FRAME(self, threshold, axis, flex=None)[1] return self.where(subset, threshold, axis=axis, inplace=inplace) def clip( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 02e9383314d36..71e7aafbca27d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -72,7 +72,7 @@ class providing the base-class of operations. _apply_docs = dict( template=""" - Apply function `func` group-wise and combine the results together. + Apply function `func` group-wise and combine the results together. The function passed to `apply` must take a {input} as its first argument and return a DataFrame, Series or scalar. `apply` will @@ -1360,17 +1360,17 @@ def groupby_function( @Substitution(name="groupby", f=name) @Appender(_common_see_also) @Appender(_local_template) - def f(self, **kwargs): - if "numeric_only" not in kwargs: - kwargs["numeric_only"] = numeric_only - if "min_count" not in kwargs: - kwargs["min_count"] = min_count - + def func(self, numeric_only=numeric_only, min_count=min_count): self._set_group_selection() # try a cython aggregation if we can try: - return self._cython_agg_general(alias, alt=npfunc, **kwargs) + return self._cython_agg_general( + how=alias, + alt=npfunc, + numeric_only=numeric_only, + min_count=min_count, + ) except DataError: pass except NotImplementedError as err: @@ -1385,9 +1385,9 @@ def f(self, **kwargs): result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) return result - set_function_name(f, name, cls) + set_function_name(func, name, cls) - return f + return func def first_compat(x, axis=0): def first(x): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 2e95daa392976..77c54ec736aaa 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -350,7 +350,7 @@ def get_group_levels(self): def _is_builtin_func(self, arg): """ - if we define an builtin function for this argument, return it, + if we define a builtin function for this argument, return it, otherwise return the arg """ return SelectionMixin._builtin_table.get(arg, arg) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5274213f114e3..f5f793c507480 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import Any, Dict, FrozenSet, Hashable, Optional, Union +from typing import Any, FrozenSet, Hashable, Optional, Union import warnings import numpy as np @@ -12,6 +12,7 @@ from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.timezones import tz_compare +from pandas._typing import Label from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -75,6 +76,7 @@ from pandas.core.strings import StringMethods from pandas.io.formats.printing import ( + PrettyDict, default_pprint, format_object_attrs, format_object_summary, @@ -243,7 +245,7 @@ def _outer_indexer(self, left, right): _typ = "index" _data: Union[ExtensionArray, np.ndarray] _id = None - _name: Optional[Hashable] = None + _name: Label = None # MultiIndex.levels previously allowed setting the index name. We # don't allow this anymore, and raise if it happens rather than # failing silently. @@ -3135,7 +3137,8 @@ def _convert_scalar_indexer(self, key, kind=None): assert kind in ["loc", "getitem", "iloc", None] if kind == "iloc": - return self._validate_indexer("positional", key, kind) + self._validate_indexer("positional", key, "iloc") + return key if len(self) and not isinstance(self, ABCMultiIndex): @@ -3144,11 +3147,11 @@ def _convert_scalar_indexer(self, key, kind=None): # or label indexing if we are using a type able # to be represented in the index - if kind in ["getitem"] and is_float(key): + if kind == "getitem" and is_float(key): if not self.is_floating(): self._invalid_indexer("label", key) - elif kind in ["loc"] and is_float(key): + elif kind == "loc" and is_float(key): # we want to raise KeyError on string/mixed here # technically we *could* raise a TypeError @@ -3162,7 +3165,7 @@ def _convert_scalar_indexer(self, key, kind=None): ]: self._invalid_indexer("label", key) - elif kind in ["loc"] and is_integer(key): + elif kind == "loc" and is_integer(key): if not self.holds_integer(): self._invalid_indexer("label", key) @@ -3188,11 +3191,10 @@ def _convert_slice_indexer(self, key: slice, kind=None): # validate iloc if kind == "iloc": - return slice( - self._validate_indexer("slice", key.start, kind), - self._validate_indexer("slice", key.stop, kind), - self._validate_indexer("slice", key.step, kind), - ) + self._validate_indexer("slice", key.start, "iloc") + self._validate_indexer("slice", key.stop, "iloc") + self._validate_indexer("slice", key.step, "iloc") + return key # potentially cast the bounds to integers start, stop, step = key.start, key.stop, key.step @@ -3213,11 +3215,10 @@ def is_int(v): integers """ if self.is_integer() or is_index_slice: - return slice( - self._validate_indexer("slice", key.start, kind), - self._validate_indexer("slice", key.stop, kind), - self._validate_indexer("slice", key.step, kind), - ) + self._validate_indexer("slice", key.start, "getitem") + self._validate_indexer("slice", key.stop, "getitem") + self._validate_indexer("slice", key.step, "getitem") + return key # convert the slice to an indexer here @@ -3347,7 +3348,7 @@ def _convert_list_indexer(self, keyarr, kind=None): return None - def _invalid_indexer(self, form, key): + def _invalid_indexer(self, form: str_t, key): """ Consistent invalid indexer message. """ @@ -4126,7 +4127,7 @@ def _assert_can_do_op(self, value): raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") @property - def _has_complex_internals(self): + def _has_complex_internals(self) -> bool: """ Indicates if an index is not directly backed by a numpy array """ @@ -4783,7 +4784,7 @@ def _maybe_promote(self, other): return self.astype("object"), other.astype("object") return self, other - def groupby(self, values) -> Dict[Hashable, np.ndarray]: + def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: """ Group the index labels by a given array of values. @@ -4808,7 +4809,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]: # map to the label result = {k: self.take(v) for k, v in result.items()} - return result + return PrettyDict(result) def map(self, mapper, na_action=None): """ @@ -5005,20 +5006,19 @@ def _maybe_cast_indexer(self, key): pass return key - def _validate_indexer(self, form, key, kind: str_t): + def _validate_indexer(self, form: str_t, key, kind: str_t): """ If we are positional indexer, validate that we have appropriate typed bounds must be an integer. """ - assert kind in ["loc", "getitem", "iloc"] + assert kind in ["getitem", "iloc"] if key is None: pass elif is_integer(key): pass - elif kind in ["iloc", "getitem"]: + else: self._invalid_indexer(form, key) - return key _index_shared_docs[ "_maybe_cast_slice_bound" @@ -5043,7 +5043,7 @@ def _validate_indexer(self, form, key, kind: str_t): """ @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side: str_t, kind): assert kind in ["loc", "getitem", None] # We are a plain index here (sub-class override this method if they @@ -5074,7 +5074,7 @@ def _searchsorted_monotonic(self, label, side="left"): raise ValueError("index must be monotonic increasing or decreasing") - def get_slice_bound(self, label, side, kind) -> int: + def get_slice_bound(self, label, side: str_t, kind) -> int: """ Calculate slice bound that corresponds to given label. @@ -5259,7 +5259,7 @@ def insert(self, loc: int, item): idx = np.concatenate((_self[:loc], item, _self[loc:])) return self._shallow_copy_with_infer(idx) - def drop(self, labels, errors="raise"): + def drop(self, labels, errors: str_t = "raise"): """ Make new Index with passed list of labels deleted. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 1a53596fb5967..235d1856a2d0b 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -172,6 +172,7 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): codes: np.ndarray categories: Index + _data: Categorical @property def _engine_type(self): @@ -312,7 +313,7 @@ def _is_dtype_compat(self, other) -> bool: return other - def equals(self, other): + def equals(self, other) -> bool: """ Determine if two CategoricalIndex objects contain the same elements. @@ -381,7 +382,7 @@ def values(self): return self._data @property - def _has_complex_internals(self): + def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return True @@ -851,12 +852,12 @@ def _concat_same_dtype(self, to_concat, name): result.name = name return result - def _delegate_property_get(self, name, *args, **kwargs): + def _delegate_property_get(self, name: str, *args, **kwargs): """ method delegation to the ._values """ prop = getattr(self._values, name) return prop # no wrapping for now - def _delegate_method(self, name, *args, **kwargs): + def _delegate_method(self, name: str, *args, **kwargs): """ method delegation to the ._values """ method = getattr(self._values, name) if "inplace" in kwargs: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b87dd0f02252f..0f385d9aba9c5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -2,7 +2,7 @@ Base and utility classes for tseries type pandas objects. """ import operator -from typing import Any, List, Optional, Set +from typing import Any, List, Optional, Set, Union import numpy as np @@ -31,7 +31,7 @@ from pandas.core import algorithms from pandas.core.accessor import PandasDelegate -from pandas.core.arrays import DatetimeArray, ExtensionArray, TimedeltaArray +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.base import _shared_docs import pandas.core.indexes.base as ibase @@ -90,7 +90,7 @@ class DatetimeIndexOpsMixin(ExtensionIndex): Common ops mixin to support a unified interface datetimelike Index. """ - _data: ExtensionArray + _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] freq: Optional[DateOffset] freqstr: Optional[str] _resolution: int @@ -468,7 +468,7 @@ def where(self, cond, other=None): result = np.where(cond, values, other).astype("i8") return self._shallow_copy(result) - def _summary(self, name=None): + def _summary(self, name=None) -> str: """ Return a summarized representation. @@ -955,7 +955,7 @@ class DatetimelikeDelegateMixin(PandasDelegate): _raw_methods: Set[str] = set() # raw_properties : dispatch properties that shouldn't be boxed in an Index _raw_properties: Set[str] = set() - _data: ExtensionArray + _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] def _delegate_property_get(self, name, *args, **kwargs): result = getattr(self._data, name) @@ -963,7 +963,7 @@ def _delegate_property_get(self, name, *args, **kwargs): result = Index(result, name=self.name) return result - def _delegate_property_set(self, name, value, *args, **kwargs): + def _delegate_property_set(self, name: str, value, *args, **kwargs): setattr(self._data, name, value) def _delegate_method(self, name, *args, **kwargs): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3afd1ff35806d..2b4636155111f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -20,7 +20,6 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import is_valid_nat_for_dtype -from pandas.core.accessor import delegate_names from pandas.core.arrays.datetimes import ( DatetimeArray, tz_to_dtype, @@ -28,10 +27,7 @@ ) import pandas.core.common as com from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name -from pandas.core.indexes.datetimelike import ( - DatetimelikeDelegateMixin, - DatetimeTimedeltaMixin, -) +from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.extension import inherit_names from pandas.core.ops import get_op_result_name import pandas.core.tools.datetimes as tools @@ -59,32 +55,13 @@ def _new_DatetimeIndex(cls, d): return result -class DatetimeDelegateMixin(DatetimelikeDelegateMixin): - # Most attrs are dispatched via datetimelike_{ops,methods} - # Some are "raw" methods, the result is not not re-boxed in an Index - # We also have a few "extra" attrs, which may or may not be raw, - # which we we dont' want to expose in the .dt accessor. - _extra_methods = ["to_period", "to_perioddelta", "to_julian_date", "strftime"] - _extra_raw_methods = [ - "to_pydatetime", - "_local_timestamps", - "_has_same_tz", - "_format_native_types", - "__iter__", - ] - _extra_raw_properties = ["_box_func", "tz", "tzinfo", "dtype"] - _delegated_properties = DatetimeArray._datetimelike_ops + _extra_raw_properties - _delegated_methods = ( - DatetimeArray._datetimelike_methods + _extra_methods + _extra_raw_methods - ) - _raw_properties = ( - {"date", "time", "timetz"} - | set(DatetimeArray._bool_ops) - | set(_extra_raw_properties) - ) - _raw_methods = set(_extra_raw_methods) - - +@inherit_names( + ["to_period", "to_perioddelta", "to_julian_date", "strftime"] + + DatetimeArray._field_ops + + DatetimeArray._datetimelike_methods, + DatetimeArray, + wrap=True, +) @inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True) @inherit_names( [ @@ -93,19 +70,22 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): "_field_ops", "_datetimelike_ops", "_datetimelike_methods", - ], - DatetimeArray, -) -@delegate_names( - DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property" -) -@delegate_names( + "_box_func", + "tz", + "tzinfo", + "dtype", + "to_pydatetime", + "_local_timestamps", + "_has_same_tz", + "_format_native_types", + "date", + "time", + "timetz", + ] + + DatetimeArray._bool_ops, DatetimeArray, - DatetimeDelegateMixin._delegated_methods, - typ="method", - overwrite=True, ) -class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): +class DatetimeIndex(DatetimeTimedeltaMixin): """ Immutable ndarray of datetime64 data, represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and @@ -218,6 +198,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): _is_numeric_dtype = False _infer_as_myclass = True + _data: DatetimeArray tz: Optional[tzinfo] # -------------------------------------------------------------------- @@ -489,7 +470,7 @@ def snap(self, freq="S"): dta = DatetimeArray(snapped, dtype=self.dtype) return DatetimeIndex._simple_new(dta, name=self.name) - def _parsed_string_to_bounds(self, reso, parsed): + def _parsed_string_to_bounds(self, reso: str, parsed: datetime): """ Calculate datetime bounds for parsed time string and its resolution. @@ -581,7 +562,7 @@ def _parsed_string_to_bounds(self, reso, parsed): return start, end def _partial_date_slice( - self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True + self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True ): """ Parameters @@ -698,7 +679,7 @@ def get_loc(self, key, method=None, tolerance=None): return Index.get_loc(self, key, method, tolerance) - def _maybe_cast_for_get_loc(self, key): + def _maybe_cast_for_get_loc(self, key) -> Timestamp: # needed to localize naive datetimes key = Timestamp(key) if key.tzinfo is None: @@ -707,7 +688,7 @@ def _maybe_cast_for_get_loc(self, key): key = key.tz_convert(self.tz) return key - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side: str, kind): """ If label is a string, cast it to datetime according to resolution. diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index d5664d760114e..66b551f654bf1 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -112,7 +112,7 @@ def wrapper(cls): return wrapper -def _make_wrapped_comparison_op(opname): +def _make_wrapped_comparison_op(opname: str): """ Create a comparison method that dispatches to ``._data``. """ @@ -132,7 +132,7 @@ def wrapper(self, other): return wrapper -def make_wrapped_arith_op(opname): +def make_wrapped_arith_op(opname: str): def method(self, other): if ( isinstance(other, Index) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 26b64836172fd..fd812b17fb37c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -216,6 +216,7 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): # Immutable, so we are able to cache computations like isna in '_mask' _mask = None + _data: IntervalArray # -------------------------------------------------------------------- # Constructors @@ -394,18 +395,18 @@ def __contains__(self, key: Any) -> bool: return False @cache_readonly - def _multiindex(self): + def _multiindex(self) -> MultiIndex: return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) @cache_readonly - def values(self): + def values(self) -> IntervalArray: """ Return the IntervalIndex's data as an IntervalArray. """ return self._data @property - def _has_complex_internals(self): + def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return True diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5a9825d58b204..02db7be1ddf41 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1347,7 +1347,7 @@ def values(self): return self._tuples @property - def _has_complex_internals(self): + def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index aece294edc3e3..f7af82920adb1 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -393,7 +393,7 @@ def _convert_scalar_indexer(self, key, kind=None): assert kind in ["loc", "getitem", "iloc", None] if kind == "iloc": - return self._validate_indexer("positional", key, kind) + self._validate_indexer("positional", key, "iloc") return key diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1e18c16d02784..4438573cb9067 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -24,7 +24,6 @@ pandas_dtype, ) -from pandas.core.accessor import delegate_names from pandas.core.arrays.period import ( PeriodArray, period_array, @@ -39,11 +38,9 @@ ensure_index, maybe_extract_name, ) -from pandas.core.indexes.datetimelike import ( - DatetimeIndexOpsMixin, - DatetimelikeDelegateMixin, -) +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.indexes.datetimes import DatetimeIndex, Index +from pandas.core.indexes.extension import inherit_names from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.tools.datetimes import DateParseError @@ -71,23 +68,14 @@ def _new_PeriodIndex(cls, **d): return cls(values, **d) -class PeriodDelegateMixin(DatetimelikeDelegateMixin): - """ - Delegate from PeriodIndex to PeriodArray. - """ - - _raw_methods = {"_format_native_types"} - _raw_properties = {"is_leap_year", "freq"} - - _delegated_properties = PeriodArray._datetimelike_ops + list(_raw_properties) - _delegated_methods = set(PeriodArray._datetimelike_methods) | _raw_methods - - -@delegate_names(PeriodArray, PeriodDelegateMixin._delegated_properties, typ="property") -@delegate_names( - PeriodArray, PeriodDelegateMixin._delegated_methods, typ="method", overwrite=True +@inherit_names( + ["strftime", "to_timestamp", "asfreq", "start_time", "end_time"] + + PeriodArray._field_ops, + PeriodArray, + wrap=True, ) -class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): +@inherit_names(["is_leap_year", "freq", "_format_native_types"], PeriodArray) +class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in time. @@ -383,7 +371,7 @@ def __contains__(self, key: Any) -> bool: return False @cache_readonly - def _int64index(self): + def _int64index(self) -> Int64Index: return Int64Index._simple_new(self.asi8, name=self.name) # ------------------------------------------------------------------------ @@ -606,7 +594,7 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError: raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side: str, kind: str): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. @@ -810,7 +798,7 @@ def _union(self, other, sort): # ------------------------------------------------------------------------ - def _apply_meta(self, rawarr): + def _apply_meta(self, rawarr) -> "PeriodIndex": if not isinstance(rawarr, PeriodIndex): if not isinstance(rawarr, PeriodArray): rawarr = PeriodArray(rawarr, freq=self.freq) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 22940f851ddb0..340397b69c624 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -400,7 +400,7 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): name = self.name return self.from_range(self._range, name=name) - def _minmax(self, meth): + def _minmax(self, meth: str): no_steps = len(self) - 1 if no_steps == -1: return np.nan @@ -409,13 +409,13 @@ def _minmax(self, meth): return self.start + self.step * no_steps - def min(self, axis=None, skipna=True, *args, **kwargs): + def min(self, axis=None, skipna=True, *args, **kwargs) -> int: """The minimum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_min(args, kwargs) return self._minmax("min") - def max(self, axis=None, skipna=True, *args, **kwargs): + def max(self, axis=None, skipna=True, *args, **kwargs) -> int: """The maximum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_max(args, kwargs) @@ -519,12 +519,12 @@ def intersection(self, other, sort=False): new_index = new_index.sort_values() return new_index - def _min_fitting_element(self, lower_limit): + def _min_fitting_element(self, lower_limit: int) -> int: """Returns the smallest element greater than or equal to the limit""" no_steps = -(-(lower_limit - self.start) // abs(self.step)) return self.start + abs(self.step) * no_steps - def _max_fitting_element(self, upper_limit): + def _max_fitting_element(self, upper_limit: int) -> int: """Returns the largest element smaller than or equal to the limit""" no_steps = (upper_limit - self.start) // abs(self.step) return self.start + abs(self.step) * no_steps diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1257e410b4125..8691f0a2a1178 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -16,7 +16,6 @@ ) from pandas.core.dtypes.missing import is_valid_nat_for_dtype -from pandas.core.accessor import delegate_names from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.timedeltas import TimedeltaArray import pandas.core.common as com @@ -28,7 +27,6 @@ ) from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, - DatetimelikeDelegateMixin, DatetimeTimedeltaMixin, ) from pandas.core.indexes.extension import inherit_names @@ -36,20 +34,20 @@ from pandas.tseries.frequencies import to_offset -class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): - # Most attrs are dispatched via datetimelike_{ops,methods} - # Some are "raw" methods, the result is not re-boxed in an Index - # We also have a few "extra" attrs, which may or may not be raw, - # which we don't want to expose in the .dt accessor. - _raw_properties = {"components", "_box_func"} - _raw_methods = {"to_pytimedelta", "sum", "std", "median", "_format_native_types"} - - _delegated_properties = TimedeltaArray._datetimelike_ops + list(_raw_properties) - _delegated_methods = TimedeltaArray._datetimelike_methods + list(_raw_methods) - - @inherit_names( - ["_box_values", "__neg__", "__pos__", "__abs__"], TimedeltaArray, wrap=True + [ + "_box_values", + "__neg__", + "__pos__", + "__abs__", + "total_seconds", + "round", + "floor", + "ceil", + ] + + TimedeltaArray._field_ops, + TimedeltaArray, + wrap=True, ) @inherit_names( [ @@ -59,21 +57,18 @@ class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): "_datetimelike_ops", "_datetimelike_methods", "_other_ops", + "components", + "_box_func", + "to_pytimedelta", + "sum", + "std", + "median", + "_format_native_types", + "freq", ], TimedeltaArray, ) -@delegate_names( - TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property" -) -@delegate_names( - TimedeltaArray, - TimedeltaDelegateMixin._delegated_methods, - typ="method", - overwrite=True, -) -class TimedeltaIndex( - DatetimeTimedeltaMixin, dtl.TimelikeOps, TimedeltaDelegateMixin, -): +class TimedeltaIndex(DatetimeTimedeltaMixin, dtl.TimelikeOps): """ Immutable ndarray of timedelta64 data, represented internally as int64, and which can be boxed to timedelta objects. @@ -135,6 +130,8 @@ class TimedeltaIndex( _is_numeric_dtype = True _infer_as_myclass = True + _data: TimedeltaArray + # ------------------------------------------------------------------- # Constructors @@ -277,7 +274,7 @@ def get_loc(self, key, method=None, tolerance=None): return Index.get_loc(self, key, method, tolerance) - def _maybe_cast_slice_bound(self, label, side, kind): + def _maybe_cast_slice_bound(self, label, side: str, kind): """ If label is a string, cast it to timedelta according to resolution. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6a679708206fc..7e56148b7569e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2124,7 +2124,7 @@ def _convert_key(self, key, is_setter: bool = False): "can only have integer indexers" ) else: - if is_integer(i) and not ax.holds_integer(): + if is_integer(i) and not (ax.holds_integer() or ax.is_floating()): raise ValueError( "At based indexing on an non-integer " "index can only have non-integer " diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 76e90a26874fc..6d2253c5dc87d 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,12 +5,13 @@ """ import datetime import operator -from typing import Set, Tuple, Union +from typing import Optional, Set, Tuple, Union import numpy as np from pandas._libs import Timedelta, Timestamp, lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 +from pandas._typing import Level from pandas.util._decorators import Appender from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype @@ -615,8 +616,27 @@ def _combine_series_frame(left, right, func, axis: int): return left._construct_result(new_data) -def _align_method_FRAME(left, right, axis): - """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ +def _align_method_FRAME( + left, right, axis, flex: Optional[bool] = False, level: Level = None +): + """ + Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. + + Parameters + ---------- + left : DataFrame + right : Any + axis: int, str, or None + flex: bool or None, default False + Whether this is a flex op, in which case we reindex. + None indicates not to check for alignment. + level : int or level name, default None + + Returns + ------- + left : DataFrame + right : Any + """ def to_series(right): msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" @@ -667,7 +687,22 @@ def to_series(right): # GH17901 right = to_series(right) - return right + if flex is not None and isinstance(right, ABCDataFrame): + if not left._indexed_same(right): + if flex: + left, right = left.align(right, join="outer", level=level, copy=False) + else: + raise ValueError( + "Can only compare identically-labeled DataFrame objects" + ) + elif isinstance(right, ABCSeries): + # axis=1 is default for DataFrame-with-Series op + axis = left._get_axis_number(axis) if axis is not None else 1 + left, right = left.align( + right, join="outer", axis=axis, level=level, copy=False + ) + + return left, right def _arith_method_FRAME(cls, op, special): @@ -687,16 +722,15 @@ def _arith_method_FRAME(cls, op, special): @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): - other = _align_method_FRAME(self, other, axis) + self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame pass_op = op if should_series_dispatch(self, other, op) else na_op pass_op = pass_op if not is_logical else op - left, right = self.align(other, join="outer", level=level, copy=False) - new_data = left._combine_frame(right, pass_op, fill_value) - return left._construct_result(new_data) + new_data = self._combine_frame(other, pass_op, fill_value) + return self._construct_result(new_data) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, @@ -708,9 +742,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): raise NotImplementedError(f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 - self, other = self.align( - other, join="outer", axis=axis, level=level, copy=False - ) return _combine_series_frame(self, other, pass_op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` @@ -737,20 +768,15 @@ def _flex_comp_method_FRAME(cls, op, special): @Appender(doc) def f(self, other, axis=default_axis, level=None): - other = _align_method_FRAME(self, other, axis) + self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame - if not self._indexed_same(other): - self, other = self.align(other, "outer", level=level, copy=False) new_data = dispatch_to_series(self, other, op, str_rep) return self._construct_result(new_data) elif isinstance(other, ABCSeries): axis = self._get_axis_number(axis) if axis is not None else 1 - self, other = self.align( - other, join="outer", axis=axis, level=level, copy=False - ) return _combine_series_frame(self, other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` @@ -769,21 +795,15 @@ def _comp_method_FRAME(cls, op, special): @Appender(f"Wrapper for comparison method {op_name}") def f(self, other): - other = _align_method_FRAME(self, other, axis=None) + self, other = _align_method_FRAME( + self, other, axis=None, level=None, flex=False + ) if isinstance(other, ABCDataFrame): # Another DataFrame - if not self._indexed_same(other): - raise ValueError( - "Can only compare identically-labeled DataFrame objects" - ) new_data = dispatch_to_series(self, other, op, str_rep) elif isinstance(other, ABCSeries): - # axis=1 is default for DataFrame-with-Series op - self, other = self.align( - other, join="outer", axis=1, level=None, copy=False - ) new_data = dispatch_to_series(self, other, op, axis="columns") else: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index fab9f41cb6c4f..f00ff0d4ba5ed 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,6 +1,6 @@ from functools import partial import itertools -from typing import List +from typing import List, Optional, Union import numpy as np @@ -375,6 +375,7 @@ def _unstack_multiple(data, clocs, fill_value=None): unstcols = unstacked.index else: unstcols = unstacked.columns + assert isinstance(unstcols, MultiIndex) # for mypy new_levels = [unstcols.levels[0]] + clevels new_names = [data.columns.name] + cnames @@ -433,15 +434,14 @@ def _unstack_frame(obj, level, fill_value=None): blocks = obj._data.unstack(unstacker, fill_value=fill_value) return obj._constructor(blocks) else: - unstacker = _Unstacker( + return _Unstacker( obj.values, obj.index, level=level, value_columns=obj.columns, fill_value=fill_value, constructor=obj._constructor, - ) - return unstacker.get_result() + ).get_result() def _unstack_extension_series(series, level, fill_value): @@ -902,9 +902,10 @@ def check_len(item, name): elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] + with_dummies: List[DataFrame] if data_to_encode.shape == data.shape: # Encoding the entire df, do not prepend any dropped columns - with_dummies: List[DataFrame] = [] + with_dummies = [] elif columns is not None: # Encoding only cols specified in columns. Get all cols not in # columns to prepend to result. @@ -994,6 +995,7 @@ def _make_col_name(prefix, prefix_sep, level) -> str: dummy_cols = [_make_col_name(prefix, prefix_sep, level) for level in levels] + index: Optional[Index] if isinstance(data, Series): index = data.index else: @@ -1001,6 +1003,7 @@ def _make_col_name(prefix, prefix_sep, level) -> str: if sparse: + fill_value: Union[bool, float, int] if is_integer_dtype(dtype): fill_value = 0 elif dtype == bool: @@ -1010,7 +1013,7 @@ def _make_col_name(prefix, prefix_sep, level) -> str: sparse_series = [] N = len(data) - sp_indices = [[] for _ in range(len(dummy_cols))] + sp_indices: List[List] = [[] for _ in range(len(dummy_cols))] mask = codes != -1 codes = codes[mask] n_idx = np.arange(N)[mask] diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index df6a38000452d..0d96c8c4acdb8 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -8,11 +8,12 @@ class _PyxlsbReader(_BaseExcelReader): def __init__(self, filepath_or_buffer: FilePathOrBuffer): - """Reader using pyxlsb engine. + """ + Reader using pyxlsb engine. Parameters - __________ - filepath_or_buffer: string, path object, or Workbook + ---------- + filepath_or_buffer: str, path object, or Workbook Object to be parsed. """ import_optional_dependency("pyxlsb") @@ -29,7 +30,7 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): from pyxlsb import open_workbook - # Todo: hack in buffer capability + # TODO: hack in buffer capability # This might need some modifications to the Pyxlsb library # Actual work for opening it is in xlsbpackage.py, line 20-ish @@ -48,7 +49,7 @@ def get_sheet_by_index(self, index: int): return self.book.get_sheet(index + 1) def _convert_cell(self, cell, convert_float: bool) -> Scalar: - # Todo: there is no way to distinguish between floats and datetimes in pyxlsb + # TODO: there is no way to distinguish between floats and datetimes in pyxlsb # This means that there is no way to read datetime types from an xlsb file yet if cell.v is None: return "" # Prevents non-named columns from not showing up as Unnamed: i diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 93c7ff524ecd4..13b18a0b5fb6f 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -6,12 +6,14 @@ from typing import ( Any, Callable, + Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, + TypeVar, Union, ) @@ -20,6 +22,8 @@ from pandas.core.dtypes.inference import is_sequence EscapeChars = Union[Mapping[str, str], Iterable[str]] +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") def adjoin(space: int, *lists: List[str], **kwargs) -> str: @@ -528,3 +532,10 @@ def format_object_attrs( if len(obj) > max_seq_items: attrs.append(("length", len(obj))) return attrs + + +class PrettyDict(Dict[_KT, _VT]): + """Dict extension to support abbreviated __repr__""" + + def __repr__(self) -> str: + return pprint_thing(self) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 55f1216a0efd7..162f3c114fa5d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -864,10 +864,10 @@ def test_alignment_non_pandas(self): ]: tm.assert_series_equal( - align(df, val, "index"), Series([1, 2, 3], index=df.index) + align(df, val, "index")[1], Series([1, 2, 3], index=df.index) ) tm.assert_series_equal( - align(df, val, "columns"), Series([1, 2, 3], index=df.columns) + align(df, val, "columns")[1], Series([1, 2, 3], index=df.columns) ) # length mismatch @@ -882,10 +882,11 @@ def test_alignment_non_pandas(self): val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) tm.assert_frame_equal( - align(df, val, "index"), DataFrame(val, index=df.index, columns=df.columns) + align(df, val, "index")[1], + DataFrame(val, index=df.index, columns=df.columns), ) tm.assert_frame_equal( - align(df, val, "columns"), + align(df, val, "columns")[1], DataFrame(val, index=df.index, columns=df.columns), ) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 8901af7a90acc..ebac36c5f8c78 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -112,7 +112,7 @@ def reduction_func(request): return request.param -@pytest.fixture(params=transformation_kernels) +@pytest.fixture(params=sorted(transformation_kernels)) def transformation_func(request): """yields the string names of all groupby transformation functions.""" return request.param diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index eb9552fbbebc1..b7d7124a3a5e5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2037,3 +2037,23 @@ def test_groupby_list_level(): expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) result = expected.groupby(level=[0]).mean() tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "max_seq_items, expected", + [ + (5, "{0: [0], 1: [1], 2: [2], 3: [3], 4: [4]}"), + (4, "{0: [0], 1: [1], 2: [2], 3: [3], ...}"), + ], +) +def test_groups_repr_truncates(max_seq_items, expected): + # GH 1135 + df = pd.DataFrame(np.random.randn(5, 1)) + df["a"] = df.index + + with pd.option_context("display.max_seq_items", max_seq_items): + result = df.groupby("a").groups.__repr__() + assert result == expected + + result = df.groupby(np.array(df.a)).groups.__repr__() + assert result == expected diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index b83ceb1ce699c..992a91ad8a528 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -402,7 +402,7 @@ def test_get_loc_missing_nan(self): ) def test_lookups_datetimelike_values(self, vals): # If we have datetime64 or timedelta64 values, make sure they are - # wrappped correctly + # wrappped correctly GH#31163 ser = pd.Series(vals, index=range(3, 6)) ser.index = ser.index.astype("float64") @@ -425,7 +425,9 @@ def test_lookups_datetimelike_values(self, vals): result = ser.at[4.0] assert isinstance(result, type(expected)) and result == expected - # Note: ser.at[4] raises ValueError; TODO: should we make this match loc? + # GH#31329 .at[4] should cast to 4.0, matching .loc behavior + result = ser.at[4] + assert isinstance(result, type(expected)) and result == expected result = ser.iloc[1] assert isinstance(result, type(expected)) and result == expected diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index f25291f4aef12..6113cfec48df9 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -86,9 +86,7 @@ def test_basic_types(self, sparse, dtype): result = get_dummies(s_df, columns=s_df.columns, sparse=sparse, dtype=dtype) if sparse: - dtype_name = "Sparse[{}, {}]".format( - self.effective_dtype(dtype).name, fill_value - ) + dtype_name = f"Sparse[{self.effective_dtype(dtype).name}, {fill_value}]" else: dtype_name = self.effective_dtype(dtype).name @@ -163,8 +161,7 @@ def test_unicode(self, sparse): s = [e, eacute, eacute] res = get_dummies(s, prefix="letter", sparse=sparse) exp = DataFrame( - {"letter_e": [1, 0, 0], "letter_{eacute}".format(eacute=eacute): [0, 1, 1]}, - dtype=np.uint8, + {"letter_e": [1, 0, 0], f"letter_{eacute}": [0, 1, 1]}, dtype=np.uint8, ) if sparse: exp = exp.apply(SparseArray, fill_value=0) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 6af9c9884589c..bbc81e0dbb6e6 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -308,7 +308,7 @@ def test_multiples(self): @pytest.mark.parametrize("month", MONTHS) def test_period_cons_quarterly(self, month): # bugs in scikits.timeseries - freq = "Q-{month}".format(month=month) + freq = f"Q-{month}" exp = Period("1989Q3", freq=freq) assert "1989Q3" in str(exp) stamp = exp.to_timestamp("D", how="end") @@ -322,7 +322,7 @@ def test_period_cons_quarterly(self, month): @pytest.mark.parametrize("month", MONTHS) def test_period_cons_annual(self, month): # bugs in scikits.timeseries - freq = "A-{month}".format(month=month) + freq = f"A-{month}" exp = Period("1989", freq=freq) stamp = exp.to_timestamp("D", how="end") + timedelta(days=30) p = Period(stamp, freq=freq) @@ -333,8 +333,8 @@ def test_period_cons_annual(self, month): @pytest.mark.parametrize("day", DAYS) @pytest.mark.parametrize("num", range(10, 17)) def test_period_cons_weekly(self, num, day): - daystr = "2011-02-{num}".format(num=num) - freq = "W-{day}".format(day=day) + daystr = f"2011-02-{num}" + freq = f"W-{day}" result = Period(daystr, freq=freq) expected = Period(daystr, freq="D").asfreq(freq) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index d8804994af426..cd7fdd55a4d2c 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -77,8 +77,8 @@ def safe_import(mod_name: str, min_version: Optional[str] = None): # TODO: -# remove when gh-24839 is fixed; this affects numpy 1.16 -# and pytables 3.4.4 +# remove when gh-24839 is fixed. +# this affects numpy 1.16 and pytables 3.4.4 tables = safe_import("tables") xfail_non_writeable = pytest.mark.xfail( tables @@ -86,7 +86,7 @@ def safe_import(mod_name: str, min_version: Optional[str] = None): and LooseVersion(tables.__version__) < LooseVersion("3.5.1"), reason=( "gh-25511, gh-24839. pytables needs a " - "release beyong 3.4.4 to support numpy 1.16x" + "release beyond 3.4.4 to support numpy 1.16.x" ), ) diff --git a/setup.cfg b/setup.cfg index 98ad5207d44a7..cf931f52489a8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -240,9 +240,6 @@ check_untyped_defs=False [mypy-pandas.core.reshape.merge] check_untyped_defs=False -[mypy-pandas.core.reshape.reshape] -check_untyped_defs=False - [mypy-pandas.core.strings] check_untyped_defs=False