From 9c6e6168bdfce5ab04f2f6456441a5a2e4de0abb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 16 Jul 2019 13:27:45 -0700 Subject: [PATCH 1/9] Removed ABCs from pandas._typing --- pandas/_typing.py | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 46b1b4685ec9f..7d309c53868e5 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,33 +1,35 @@ from pathlib import Path -from typing import IO, AnyStr, TypeVar, Union +from typing import IO, TYPE_CHECKING, AnyStr, TypeVar, Union import numpy as np -from pandas._libs import Timestamp -from pandas._libs.tslibs.period import Period -from pandas._libs.tslibs.timedeltas import Timedelta +if TYPE_CHECKING: # Use for any internal imports + from pandas._libs import Timestamp + from pandas._libs.tslibs.period import Period + from pandas._libs.tslibs.timedeltas import Timedelta + + from pandas.core.arrays.base import ExtensionArray + from pandas.core.dtypes.dtypes import ExtensionDtype + from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCIndexClass, + ABCSeries, + ABCSparseSeries, + ) + from pandas.core.indexes.base import Index + from pandas.core.frame import DataFrame + from pandas.core.series import Series + from pandas.core.sparse.series import SparseSeries -from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCExtensionArray, - ABCIndexClass, - ABCSeries, - ABCSparseSeries, -) AnyArrayLike = TypeVar( - "AnyArrayLike", - ABCExtensionArray, - ABCIndexClass, - ABCSeries, - ABCSparseSeries, - np.ndarray, + "AnyArrayLike", "ExtensionArray", "Index", "Series", "SparseSeries", np.ndarray ) -ArrayLike = TypeVar("ArrayLike", ABCExtensionArray, np.ndarray) -DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta) -Dtype = Union[str, np.dtype, ExtensionDtype] +ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray) +DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta") +Dtype = Union[str, np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] -FrameOrSeries = TypeVar("FrameOrSeries", ABCSeries, ABCDataFrame) +FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame") Scalar = Union[str, int, float] From 6490263f3631442b8e3cf523bbf1be575e866f2c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 16 Jul 2019 16:20:04 -0700 Subject: [PATCH 2/9] Fix CI --- pandas/core/dtypes/common.py | 4 ++-- pandas/core/indexes/interval.py | 6 ++++-- setup.cfg | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d0e4bd9b4482a..5e44af786763d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -168,11 +168,11 @@ def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array: will remain unchanged. """ try: - return arr.astype("int64", copy=copy, casting="safe") + return arr.astype("int64", copy=copy, casting="safe") # type: ignore except TypeError: pass try: - return arr.astype("uint64", copy=copy, casting="safe") + return arr.astype("uint64", copy=copy, casting="safe") # type: ignore except TypeError: return arr.astype("float64", copy=copy) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b14cff8cc6ade..2bccd39735f5d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -934,7 +934,7 @@ def get_indexer( elif not is_object_dtype(target): # homogeneous scalar index: use IntervalTree target = self._maybe_convert_i8(target) - indexer = self._engine.get_indexer(target.values) + indexer = self._engine.get_indexer(target.values) # type: ignore else: # heterogeneous scalar index: defer elementwise to get_loc # (non-overlapping so get_loc guarantees scalar of KeyError) @@ -979,7 +979,9 @@ def get_indexer_non_unique( indexer = np.concatenate(indexer) else: target = self._maybe_convert_i8(target) - indexer, missing = self._engine.get_indexer_non_unique(target.values) + indexer, missing = self._engine.get_indexer_non_unique( + target.values + ) # type: ignore return ensure_platform_int(indexer), ensure_platform_int(missing) diff --git a/setup.cfg b/setup.cfg index e559ece2a759a..915acd3339118 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,6 +30,7 @@ exclude = .eggs/*.py, versioneer.py, env # exclude asv benchmark environments from linting + pandas/_typing.py [flake8-rst] bootstrap = From cce9afbd51932f4f9ee19b027de3ad34112b508c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 16 Jul 2019 16:57:02 -0700 Subject: [PATCH 3/9] Fixed type ignore location --- pandas/core/indexes/interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2bccd39735f5d..e31a5a8f935eb 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -980,8 +980,8 @@ def get_indexer_non_unique( else: target = self._maybe_convert_i8(target) indexer, missing = self._engine.get_indexer_non_unique( - target.values - ) # type: ignore + target.values # type: ignore + ) return ensure_platform_int(indexer), ensure_platform_int(missing) From 5f2fa6aedf78ab1fd3b05f9a119235c4db3ca5c9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 17 Jul 2019 09:10:40 -0700 Subject: [PATCH 4/9] Excluded from coveraged and updated tslib imports --- pandas/_typing.py | 4 +--- setup.cfg | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 7d309c53868e5..243b69bc6ea7b 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -4,9 +4,7 @@ import numpy as np if TYPE_CHECKING: # Use for any internal imports - from pandas._libs import Timestamp - from pandas._libs.tslibs.period import Period - from pandas._libs.tslibs.timedeltas import Timedelta + from pandas._libs import Period, Timedelta, Timestamp from pandas.core.arrays.base import ExtensionArray from pandas.core.dtypes.dtypes import ExtensionDtype diff --git a/setup.cfg b/setup.cfg index f53e8631593eb..44b11a9db65e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,7 +78,9 @@ filterwarnings = [coverage:run] branch = False -omit = */tests/* +omit = + */tests/* + pandas/_typing.py plugins = Cython.Coverage [coverage:report] From 5a8d35a82753f62f0c33e9ae7f22fc525e627591 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 21 Jul 2019 09:41:03 -0700 Subject: [PATCH 5/9] Comments in typing --- pandas/_typing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 861c27cc24ba6..83ce82e968870 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -3,7 +3,10 @@ import numpy as np -if TYPE_CHECKING: # Use for any internal imports +# To prevent import cycles place any internal imports in the branch below +# and use a string literal forward reference to it in subsequent types +# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles +if TYPE_CHECKING: from pandas._libs import Period, Timedelta, Timestamp from pandas.core.arrays.base import ExtensionArray From 58897155f75a5b93e2918e3f9ba3ed445acae0db Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 21 Jul 2019 10:20:30 -0700 Subject: [PATCH 6/9] Removed shadowing of target variable --- pandas/core/dtypes/common.py | 1 + pandas/core/indexes/interval.py | 48 ++++++++++++++++----------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4681e289c949e..054c97056a117 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -167,6 +167,7 @@ def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array: If the array is explicitly of type uint64 the type will remain unchanged. """ + # TODO: GH27506 potential bug with ExtensionArrays try: return arr.astype("int64", copy=copy, casting="safe") # type: ignore except TypeError: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 543592b440a2b..e19965c3b10e5 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -906,35 +906,35 @@ def get_indexer( ) raise InvalidIndexError(msg) - target = ensure_index(target) + target_as_index = ensure_index(target) - if isinstance(target, IntervalIndex): + if isinstance(target_as_index, IntervalIndex): # equal indexes -> 1:1 positional match - if self.equals(target): + if self.equals(target_as_index): return np.arange(len(self), dtype="intp") # different closed or incompatible subtype -> no matches common_subtype = find_common_type( - [self.dtype.subtype, target.dtype.subtype] + [self.dtype.subtype, target_as_index.dtype.subtype] ) - if self.closed != target.closed or is_object_dtype(common_subtype): - return np.repeat(np.intp(-1), len(target)) + if self.closed != target_as_index.closed or is_object_dtype(common_subtype): + return np.repeat(np.intp(-1), len(target_as_index)) - # non-overlapping -> at most one match per interval in target + # non-overlapping -> at most one match per interval in target_as_index # want exact matches -> need both left/right to match, so defer to # left/right get_indexer, compare elementwise, equality -> match - left_indexer = self.left.get_indexer(target.left) - right_indexer = self.right.get_indexer(target.right) + left_indexer = self.left.get_indexer(target_as_index.left) + right_indexer = self.right.get_indexer(target_as_index.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) - elif not is_object_dtype(target): + elif not is_object_dtype(target_as_index): # homogeneous scalar index: use IntervalTree - target = self._maybe_convert_i8(target) - indexer = self._engine.get_indexer(target.values) # type: ignore + target_as_index = self._maybe_convert_i8(target_as_index) + indexer = self._engine.get_indexer(target_as_index.values) else: # heterogeneous scalar index: defer elementwise to get_loc # (non-overlapping so get_loc guarantees scalar of KeyError) indexer = [] - for key in target: + for key in target_as_index: try: loc = self.get_loc(key) except KeyError: @@ -947,21 +947,21 @@ def get_indexer( def get_indexer_non_unique( self, target: AnyArrayLike ) -> Tuple[np.ndarray, np.ndarray]: - target = ensure_index(target) + target_as_index = ensure_index(target) - # check that target IntervalIndex is compatible - if isinstance(target, IntervalIndex): + # check that target_as_index IntervalIndex is compatible + if isinstance(target_as_index, IntervalIndex): common_subtype = find_common_type( - [self.dtype.subtype, target.dtype.subtype] + [self.dtype.subtype, target_as_index.dtype.subtype] ) - if self.closed != target.closed or is_object_dtype(common_subtype): + if self.closed != target_as_index.closed or is_object_dtype(common_subtype): # different closed or incompatible subtype -> no matches - return np.repeat(-1, len(target)), np.arange(len(target)) + return np.repeat(-1, len(target_as_index)), np.arange(len(target_as_index)) - if is_object_dtype(target) or isinstance(target, IntervalIndex): - # target might contain intervals: defer elementwise to get_loc + if is_object_dtype(target_as_index) or isinstance(target_as_index, IntervalIndex): + # target_as_index might contain intervals: defer elementwise to get_loc indexer, missing = [], [] - for i, key in enumerate(target): + for i, key in enumerate(target_as_index): try: locs = self.get_loc(key) if isinstance(locs, slice): @@ -973,9 +973,9 @@ def get_indexer_non_unique( indexer.append(locs) indexer = np.concatenate(indexer) else: - target = self._maybe_convert_i8(target) + target_as_index = self._maybe_convert_i8(target_as_index) indexer, missing = self._engine.get_indexer_non_unique( - target.values # type: ignore + target_as_index.values ) return ensure_platform_int(indexer), ensure_platform_int(missing) From ed2ee7fde630f70d69935c2a74f3464d5a3f572e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 21 Jul 2019 10:22:41 -0700 Subject: [PATCH 7/9] Blackify --- pandas/core/indexes/interval.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e19965c3b10e5..66290ae54e626 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -956,9 +956,14 @@ def get_indexer_non_unique( ) if self.closed != target_as_index.closed or is_object_dtype(common_subtype): # different closed or incompatible subtype -> no matches - return np.repeat(-1, len(target_as_index)), np.arange(len(target_as_index)) + return ( + np.repeat(-1, len(target_as_index)), + np.arange(len(target_as_index)), + ) - if is_object_dtype(target_as_index) or isinstance(target_as_index, IntervalIndex): + if is_object_dtype(target_as_index) or isinstance( + target_as_index, IntervalIndex + ): # target_as_index might contain intervals: defer elementwise to get_loc indexer, missing = [], [] for i, key in enumerate(target_as_index): From 095aed491f60c73dd742fdf312a29df50c7a47cc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 Jul 2019 10:09:21 -0700 Subject: [PATCH 8/9] Removed failing annotation --- pandas/core/window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 20d5453cc43a2..184155293e684 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -240,7 +240,7 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: return values - def _wrap_result(self, result, block=None, obj=None) -> FrameOrSeries: + def _wrap_result(self, result, block=None, obj=None): """ Wrap a single result. """ From 4bf254c200c6b702c4ba7eed46deadce3c42721e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 Jul 2019 13:23:06 -0700 Subject: [PATCH 9/9] Removed unused imports --- pandas/_typing.py | 22 +++++++--------------- setup.cfg | 1 - 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 83ce82e968870..45c43fa958caa 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -7,21 +7,13 @@ # and use a string literal forward reference to it in subsequent types # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: - from pandas._libs import Period, Timedelta, Timestamp - - from pandas.core.arrays.base import ExtensionArray - from pandas.core.dtypes.dtypes import ExtensionDtype - from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCExtensionArray, - ABCIndexClass, - ABCSeries, - ABCSparseSeries, - ) - from pandas.core.indexes.base import Index - from pandas.core.frame import DataFrame - from pandas.core.series import Series - from pandas.core.sparse.series import SparseSeries + from pandas._libs import Period, Timedelta, Timestamp # noqa: F401 + from pandas.core.arrays.base import ExtensionArray # noqa: F401 + from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401 + from pandas.core.indexes.base import Index # noqa: F401 + from pandas.core.frame import DataFrame # noqa: F401 + from pandas.core.series import Series # noqa: F401 + from pandas.core.sparse.series import SparseSeries # noqa: F401 AnyArrayLike = TypeVar( diff --git a/setup.cfg b/setup.cfg index 44b11a9db65e0..716ff5d9d8853 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,6 @@ exclude = .eggs/*.py, versioneer.py, env # exclude asv benchmark environments from linting - pandas/_typing.py [flake8-rst] bootstrap =