From b949bb1b787dd1a37fc79837c7dfa18421521957 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 17 Jul 2020 18:59:07 +0100 Subject: [PATCH 1/5] CLN: consistent signatures for equals methods --- pandas/_libs/sparse.pyx | 4 ++-- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/generic.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/internals/managers.py | 14 ++++++++++++-- 10 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7c9575d921dc9..d79db86685ac7 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -94,7 +94,7 @@ cdef class IntIndex(SparseIndex): if not monotonic: raise ValueError("Indices must be strictly increasing") - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: if not isinstance(other, IntIndex): return False @@ -390,7 +390,7 @@ cdef class BlockIndex(SparseIndex): if blengths[i] == 0: raise ValueError(f'Zero-length block {i}') - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: if not isinstance(other, BlockIndex): return False diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 2553a65aed07b..55af2fc8281b7 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -742,7 +742,7 @@ def searchsorted(self, value, side="left", sorter=None): arr = self.astype(object) return arr.searchsorted(value, side=side, sorter=sorter) - def equals(self, other: "ExtensionArray") -> bool: + def equals(self, other: Any) -> bool: """ Return if another array is equivalent to this array. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index db9cfd9d7fc59..607dc91298e81 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2,7 +2,7 @@ from functools import partial import operator from shutil import get_terminal_size -from typing import Dict, Hashable, List, Type, Union, cast +from typing import Any, Dict, Hashable, List, Type, Union, cast from warnings import warn import numpy as np @@ -2242,7 +2242,7 @@ def _from_factorized(cls, uniques, original): original.categories.take(uniques), dtype=original.dtype ) - def equals(self, other): + def equals(self, other: Any) -> bool: """ Returns True if categorical arrays are equal. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e46fde1f59f16..0eb235b110ca6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1195,7 +1195,7 @@ def _indexed_same(self, other) -> bool: self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS ) - def equals(self, other): + def equals(self, other: Any) -> bool: """ Test whether two objects contain the same elements. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b0b008de69a94..5d5cb54a3bab8 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -290,7 +290,7 @@ def _is_dtype_compat(self, other) -> bool: return other - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: """ Determine if two CategoricalIndex objects contain the same elements. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 15a7e25238983..65b3eb4b9eb2b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -126,7 +126,7 @@ def __array_wrap__(self, result, context=None): # ------------------------------------------------------------------------ - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9548ebbd9c3b2..ab9d42f444baa 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1005,7 +1005,7 @@ def _format_space(self) -> str: def argsort(self, *args, **kwargs) -> np.ndarray: return np.lexsort((self.right, self.left)) - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: """ Determines if two IntervalIndex objects contain the same elements. """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 235da89083d0a..a9676fe656cba 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3204,7 +3204,7 @@ def truncate(self, before=None, after=None): verify_integrity=False, ) - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: """ Determines if two MultiIndex objects have the same labeling information (the levels themselves do not necessarily have to be the same) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e5e98039ff77b..c0f55991610da 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -445,7 +445,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray: else: return np.arange(len(self) - 1, -1, -1) - def equals(self, other) -> bool: + def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d5947726af7fd..4c80e04bd1479 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,7 +2,17 @@ import itertools import operator import re -from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import ( + Any, + DefaultDict, + Dict, + List, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import warnings import numpy as np @@ -1415,7 +1425,7 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True ) - def equals(self, other: "BlockManager") -> bool: + def equals(self, other: Any) -> bool: self_axes, other_axes = self.axes, other.axes if len(self_axes) != len(other_axes): return False From 015e2b72b747c854062d2d776732190442aa1f73 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 8 Aug 2020 18:56:15 +0100 Subject: [PATCH 2/5] Any > object for method body checking --- pandas/_libs/sparse.pyx | 4 ++-- pandas/core/arrays/base.py | 7 ++++--- pandas/core/arrays/categorical.py | 8 +++++--- pandas/core/generic.py | 4 +++- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 6 +++--- pandas/core/indexes/interval.py | 7 ++++--- pandas/core/indexes/multi.py | 11 +++++------ pandas/core/indexes/range.py | 2 +- pandas/core/internals/managers.py | 17 +++++------------ pandas/tests/plotting/common.py | 1 + pandas/tests/series/indexing/test_datetime.py | 1 + 13 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7bcf25e0c5a43..0c3d8915b749b 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -103,7 +103,7 @@ cdef class IntIndex(SparseIndex): if not monotonic: raise ValueError("Indices must be strictly increasing") - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: if not isinstance(other, IntIndex): return False @@ -399,7 +399,7 @@ cdef class BlockIndex(SparseIndex): if blengths[i] == 0: raise ValueError(f'Zero-length block {i}') - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: if not isinstance(other, BlockIndex): return False diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 0c15d71d99105..c203b2f30e07c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -7,7 +7,7 @@ without warning. """ import operator -from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union, cast import numpy as np @@ -742,7 +742,7 @@ def searchsorted(self, value, side="left", sorter=None): arr = self.astype(object) return arr.searchsorted(value, side=side, sorter=sorter) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Return if another array is equivalent to this array. @@ -762,7 +762,8 @@ def equals(self, other: Any) -> bool: """ if not type(self) == type(other): return False - elif not self.dtype == other.dtype: + other = cast(ExtensionArray, other) + if not self.dtype == other.dtype: return False elif not len(self) == len(other): return False diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1965ea21bcb35..a28b341669918 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2,7 +2,7 @@ from functools import partial import operator from shutil import get_terminal_size -from typing import Any, Dict, Hashable, List, Type, Union, cast +from typing import Dict, Hashable, List, Type, Union, cast from warnings import warn import numpy as np @@ -2242,7 +2242,7 @@ def _from_factorized(cls, uniques, original): original.categories.take(uniques), dtype=original.dtype ) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Returns True if categorical arrays are equal. @@ -2254,7 +2254,9 @@ def equals(self, other: Any) -> bool: ------- bool """ - if self.is_dtype_equal(other): + if not isinstance(other, Categorical): + return False + elif self.is_dtype_equal(other): if self.categories.equals(other.categories): # fastpath to avoid re-coding other_codes = other._codes diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ef1296309d0ce..6546621e28a65 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -22,6 +22,7 @@ Tuple, Type, Union, + cast, ) import warnings import weakref @@ -1195,7 +1196,7 @@ def _indexed_same(self, other) -> bool: self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS ) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Test whether two objects contain the same elements. @@ -1275,6 +1276,7 @@ def equals(self, other: Any) -> bool: """ if not (isinstance(other, type(self)) or isinstance(self, type(other))): return False + other = cast(NDFrame, other) return self._mgr.equals(other._mgr) # ------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bfdfbd35f27ad..91364da4f9327 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4168,7 +4168,7 @@ def putmask(self, mask, value): # coerces to object return self.astype(object).putmask(mask, value) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determine if two Index object are equal. @@ -4242,6 +4242,7 @@ def equals(self, other: Any) -> bool: and type(other) is not type(self) and other.equals is not self.equals ): + breakpoint() return other.equals(self) return array_equivalent(self._values, other._values) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 295fa4d163400..4990e6a8e20e9 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -290,7 +290,7 @@ def _is_dtype_compat(self, other) -> bool: return other - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determine if two CategoricalIndex objects contain the same elements. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d8e743e034c12..47cbe11eac3ee 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,7 +24,7 @@ is_scalar, ) from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ABCIndex, ABCSeries from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray @@ -130,14 +130,14 @@ def __array_wrap__(self, result, context=None): # ------------------------------------------------------------------------ - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True - if not isinstance(other, ABCIndexClass): + if not isinstance(other, Index): return False elif not isinstance(other, type(self)): try: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ab9d42f444baa..e8d0a44324cc5 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1005,19 +1005,20 @@ def _format_space(self) -> str: def argsort(self, *args, **kwargs) -> np.ndarray: return np.lexsort((self.right, self.left)) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determines if two IntervalIndex objects contain the same elements. """ if self.is_(other): return True - # if we can coerce to an II - # then we can compare + # if we can coerce to an IntervalIndex then we can compare if not isinstance(other, IntervalIndex): if not is_interval_dtype(other): return False other = Index(other) + if not isinstance(other, IntervalIndex): + return False return ( self.left.equals(other.left) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index be332a7061ae1..ed4da50201c0d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3205,7 +3205,7 @@ def truncate(self, before=None, after=None): verify_integrity=False, ) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determines if two MultiIndex objects have the same labeling information (the levels themselves do not necessarily have to be the same) @@ -3248,11 +3248,10 @@ def equals(self, other: Any) -> bool: np.asarray(other.levels[i]._values), other_codes, allow_fill=False ) - # since we use NaT both datetime64 and timedelta64 - # we can have a situation where a level is typed say - # timedelta64 in self (IOW it has other values than NaT) - # but types datetime64 in other (where its all NaT) - # but these are equivalent + # since we use NaT both datetime64 and timedelta64 we can have a + # situation where a level is typed say timedelta64 in self (IOW it + # has other values than NaT) but types datetime64 in other (where + # its all NaT) but these are equivalent if len(self_values) == 0 and len(other_values) == 0: continue diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9e0bead39a855..d035cee7c3bc9 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -434,7 +434,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray: else: return np.arange(len(self) - 1, -1, -1) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: """ Determines if two Index objects contain the same elements. """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a742de36c7bbe..371b721f08b27 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,17 +2,7 @@ import itertools import operator import re -from typing import ( - Any, - DefaultDict, - Dict, - List, - Optional, - Sequence, - Tuple, - TypeVar, - Union, -) +from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import warnings import numpy as np @@ -1447,7 +1437,10 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True ) - def equals(self, other: Any) -> bool: + def equals(self, other: object) -> bool: + if not isinstance(other, BlockManager): + return False + self_axes, other_axes = self.axes, other.axes if len(self_axes) != len(other_axes): return False diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 3b1ff233c5ec1..289809fad61fa 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -13,6 +13,7 @@ from pandas import DataFrame, Series import pandas._testing as tm + """ This is a common base class used for various plotting tests """ diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 088f8681feb99..19d5dfa3b3900 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -11,6 +11,7 @@ from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range import pandas._testing as tm + """ Also test support for datetime64[ns] in Series / DataFrame """ From a460526f885da5b4fa13c0ca22aea31c987987d5 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 8 Aug 2020 20:05:19 +0100 Subject: [PATCH 3/5] isort fixup --- pandas/tests/plotting/common.py | 1 - pandas/tests/series/indexing/test_datetime.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 289809fad61fa..3b1ff233c5ec1 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -13,7 +13,6 @@ from pandas import DataFrame, Series import pandas._testing as tm - """ This is a common base class used for various plotting tests """ diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 19d5dfa3b3900..088f8681feb99 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -11,7 +11,6 @@ from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range import pandas._testing as tm - """ Also test support for datetime64[ns] in Series / DataFrame """ From 4aea1c7f59034f7761df7191a6daa9c8a6eb3b9b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 8 Aug 2020 20:10:32 +0100 Subject: [PATCH 4/5] remove breakpoint --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 55fb8456c4bc1..6c3fc9f848d50 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4242,7 +4242,6 @@ def equals(self, other: object) -> bool: and type(other) is not type(self) and other.equals is not self.equals ): - breakpoint() return other.equals(self) return array_equivalent(self._values, other._values) From f0e63277f06ef7d19ec89bc7bb69aaf0382c011e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 10 Aug 2020 15:18:04 +0100 Subject: [PATCH 5/5] use is_dtype_equal --- pandas/core/arrays/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c203b2f30e07c..d85647edc3b81 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -20,7 +20,12 @@ from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.cast import maybe_cast_to_extension_array -from pandas.core.dtypes.common import is_array_like, is_list_like, pandas_dtype +from pandas.core.dtypes.common import ( + is_array_like, + is_dtype_equal, + is_list_like, + pandas_dtype, +) from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -763,7 +768,7 @@ def equals(self, other: object) -> bool: if not type(self) == type(other): return False other = cast(ExtensionArray, other) - if not self.dtype == other.dtype: + if not is_dtype_equal(self.dtype, other.dtype): return False elif not len(self) == len(other): return False