From 632eed537d033ae25f6f5d7d5a1a9951e0f1ac8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 3 Mar 2022 22:59:31 -0500 Subject: [PATCH 01/18] TYP: pd.isna --- pandas/core/arrays/boolean.py | 2 + pandas/core/base.py | 4 +- pandas/core/dtypes/missing.py | 64 ++++++++++++++++++++++++++++++-- pandas/core/window/ewm.py | 4 +- pandas/io/parsers/base_parser.py | 4 +- 5 files changed, 71 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 10469f2aef9ea..345e39da26db0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -182,6 +182,7 @@ def coerce_to_array( values = values.copy() elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): mask_values = isna(values) + assert mask_values is not None values_bool = np.zeros(len(values), dtype=bool) values_bool[~mask_values] = values[~mask_values].astype(bool) @@ -201,6 +202,7 @@ def coerce_to_array( raise TypeError("Need to pass bool-like values") mask_values = isna(values_object) + assert mask_values is not None values = np.zeros(len(values), dtype=bool) values[~mask_values] = values_object[~mask_values].astype(bool) diff --git a/pandas/core/base.py b/pandas/core/base.py index 84bc6cb161bec..782d2a0fe05de 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -769,7 +769,9 @@ def hasnans(self) -> bool: Enables various performance speedups. """ - return bool(isna(self).any()) + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" has + # no attribute "any" + return bool(isna(self).any()) # type: ignore[union-attr] def isna(self): return isna(self._values) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 25c814fa1ffb8..4a4ac5291a68d 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -5,6 +5,10 @@ from decimal import Decimal from functools import partial +from typing import ( + TYPE_CHECKING, + overload, +) import numpy as np @@ -19,7 +23,6 @@ from pandas._typing import ( ArrayLike, DtypeObj, - npt, ) from pandas.core.dtypes.common import ( @@ -54,6 +57,19 @@ ) from pandas.core.dtypes.inference import is_list_like +if TYPE_CHECKING: + import numpy.typing as npt + + from pandas._typing import ( + NDFrame, + NDFrameT, + Scalar, + ) + + from pandas.core.arrays.base import ExtensionArray + from pandas.core.indexes.base import Index + + isposinf_scalar = libmissing.isposinf_scalar isneginf_scalar = libmissing.isneginf_scalar @@ -63,7 +79,27 @@ _dtype_str = np.dtype(str) -def isna(obj): +@overload +def isna(obj: Scalar) -> bool: + ... + + +@overload +def isna(obj: Index | ExtensionArray | np.ndarray | list) -> npt.NDArray[np.bool_]: + ... + + +@overload +def isna(obj: NDFrameT) -> NDFrameT: + ... + + +@overload +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + ... + + +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: """ Detect missing values for an array-like object. @@ -284,7 +320,27 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bo return result -def notna(obj): +@overload +def notna(obj: Scalar) -> bool: + ... + + +@overload +def notna(obj: Index | ExtensionArray | np.ndarray | list) -> npt.NDArray[np.bool_]: + ... + + +@overload +def notna(obj: NDFrameT) -> NDFrameT: + ... + + +@overload +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: + ... + + +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: """ Detect non-missing values for an array-like object. @@ -362,7 +418,7 @@ def notna(obj): Name: 1, dtype: bool """ res = isna(obj) - if is_scalar(res): + if isinstance(res, bool): return not res return ~res diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 4c2b99762b812..8ce5c2b6f80e2 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -391,7 +391,9 @@ def __init__( raise ValueError( "halflife must be a string or datetime.timedelta object" ) - if isna(self.times).any(): + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" + # has no attribute "any" + if isna(self.times).any(): # type: ignore[union-attr] raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index e071e281d5a90..72795a6cfc5ae 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -31,6 +31,7 @@ from pandas._typing import ( ArrayLike, DtypeArg, + Scalar, ) from pandas.errors import ( ParserError, @@ -855,8 +856,9 @@ def _check_data_length( data: list of array-likes containing the data column-wise. """ if not self.index_col and len(columns) != len(data) and columns: + last_entry = cast(Scalar, data[-1]) if len(columns) == len(data) - 1 and np.all( - (is_object_dtype(data[-1]) and data[-1] == "") | isna(data[-1]) + (is_object_dtype(last_entry) and last_entry == "") | isna(last_entry) ): return warnings.warn( From 4496f4242632dbfe3397745b2f6cf5c60982d9f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 4 Mar 2022 18:04:18 -0500 Subject: [PATCH 02/18] address review --- pandas/core/arrays/boolean.py | 11 +++++++---- pandas/core/base.py | 4 +--- pandas/core/dtypes/missing.py | 13 ++++++++----- pandas/io/parsers/base_parser.py | 1 + 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 345e39da26db0..12c9012478e3b 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,7 +1,10 @@ from __future__ import annotations import numbers -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) import numpy as np @@ -31,6 +34,8 @@ if TYPE_CHECKING: import pyarrow + from pandas._typing import npt + @register_extension_dtype class BooleanDtype(BaseMaskedDtype): @@ -182,7 +187,6 @@ def coerce_to_array( values = values.copy() elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): mask_values = isna(values) - assert mask_values is not None values_bool = np.zeros(len(values), dtype=bool) values_bool[~mask_values] = values[~mask_values].astype(bool) @@ -201,8 +205,7 @@ def coerce_to_array( if inferred_dtype not in ("boolean", "empty") + integer_like: raise TypeError("Need to pass bool-like values") - mask_values = isna(values_object) - assert mask_values is not None + mask_values = cast(npt.NDArray[np.bool_], isna(values_object)) values = np.zeros(len(values), dtype=bool) values[~mask_values] = values_object[~mask_values].astype(bool) diff --git a/pandas/core/base.py b/pandas/core/base.py index 782d2a0fe05de..84bc6cb161bec 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -769,9 +769,7 @@ def hasnans(self) -> bool: Enables various performance speedups. """ - # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" has - # no attribute "any" - return bool(isna(self).any()) # type: ignore[union-attr] + return bool(isna(self).any()) def isna(self): return isna(self._values) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 4a4ac5291a68d..d3abb3c0186a2 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -58,16 +58,15 @@ from pandas.core.dtypes.inference import is_list_like if TYPE_CHECKING: - import numpy.typing as npt - from pandas._typing import ( NDFrame, NDFrameT, Scalar, + npt, ) from pandas.core.arrays.base import ExtensionArray - from pandas.core.indexes.base import Index + from pandas.core.base import IndexOpsMixin isposinf_scalar = libmissing.isposinf_scalar @@ -85,7 +84,9 @@ def isna(obj: Scalar) -> bool: @overload -def isna(obj: Index | ExtensionArray | np.ndarray | list) -> npt.NDArray[np.bool_]: +def isna( + obj: IndexOpsMixin | ExtensionArray | np.ndarray | list, +) -> npt.NDArray[np.bool_]: ... @@ -326,7 +327,9 @@ def notna(obj: Scalar) -> bool: @overload -def notna(obj: Index | ExtensionArray | np.ndarray | list) -> npt.NDArray[np.bool_]: +def notna( + obj: IndexOpsMixin | ExtensionArray | np.ndarray | list, +) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 72795a6cfc5ae..b3a96b2f3aee2 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -856,6 +856,7 @@ def _check_data_length( data: list of array-likes containing the data column-wise. """ if not self.index_col and len(columns) != len(data) and columns: + # without this cast mypy thinks that last_entry could still be an ndarray last_entry = cast(Scalar, data[-1]) if len(columns) == len(data) - 1 and np.all( (is_object_dtype(last_entry) and last_entry == "") | isna(last_entry) From b77f7169e08003e90fb591cfc0c049c6a7659599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 4 Mar 2022 18:19:08 -0500 Subject: [PATCH 03/18] avoid needing to import npt at runtime --- pandas/core/arrays/boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 12c9012478e3b..9a37c42e63e82 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -205,7 +205,7 @@ def coerce_to_array( if inferred_dtype not in ("boolean", "empty") + integer_like: raise TypeError("Need to pass bool-like values") - mask_values = cast(npt.NDArray[np.bool_], isna(values_object)) + mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) values = np.zeros(len(values), dtype=bool) values[~mask_values] = values_object[~mask_values].astype(bool) From 24aec6f4a0913219a516300183a9e6ec69c224cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 4 Mar 2022 19:07:09 -0500 Subject: [PATCH 04/18] comment for cast --- pandas/core/arrays/boolean.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 9a37c42e63e82..03eeb1929ae3b 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -205,6 +205,8 @@ def coerce_to_array( if inferred_dtype not in ("boolean", "empty") + integer_like: raise TypeError("Need to pass bool-like values") + # mypy does not to narrow the type of mask_values to npt.NDArray[np.bool_] + # within this branch, it assumes it can also be None mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) values = np.zeros(len(values), dtype=bool) values[~mask_values] = values_object[~mask_values].astype(bool) From c1204b4b9f9003d8f02b67ac3a528a41c46ceb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 5 Mar 2022 10:05:50 -0500 Subject: [PATCH 05/18] remove to --- pandas/core/arrays/boolean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 03eeb1929ae3b..9d0af65e86eff 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -205,7 +205,7 @@ def coerce_to_array( if inferred_dtype not in ("boolean", "empty") + integer_like: raise TypeError("Need to pass bool-like values") - # mypy does not to narrow the type of mask_values to npt.NDArray[np.bool_] + # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_] # within this branch, it assumes it can also be None mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) values = np.zeros(len(values), dtype=bool) From b412060d05ad99a0546b0e97d2d54c0070565e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 5 Mar 2022 22:29:56 -0500 Subject: [PATCH 06/18] back to Index --- pandas/core/base.py | 4 +++- pandas/core/dtypes/missing.py | 6 +++--- pandas/io/parsers/base_parser.py | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 84bc6cb161bec..6ddbdb0fa3d07 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -769,7 +769,9 @@ def hasnans(self) -> bool: Enables various performance speedups. """ - return bool(isna(self).any()) + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" + # has no attribute "any" + return bool(isna(self).any()) # type: ignore[union-attr] def isna(self): return isna(self._values) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d3abb3c0186a2..86ef9265afd2c 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -66,7 +66,7 @@ ) from pandas.core.arrays.base import ExtensionArray - from pandas.core.base import IndexOpsMixin + from pandas.core.indexes.base import Index isposinf_scalar = libmissing.isposinf_scalar @@ -85,7 +85,7 @@ def isna(obj: Scalar) -> bool: @overload def isna( - obj: IndexOpsMixin | ExtensionArray | np.ndarray | list, + obj: Index | ExtensionArray | np.ndarray | list, ) -> npt.NDArray[np.bool_]: ... @@ -328,7 +328,7 @@ def notna(obj: Scalar) -> bool: @overload def notna( - obj: IndexOpsMixin | ExtensionArray | np.ndarray | list, + obj: Index | ExtensionArray | np.ndarray | list, ) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index b3a96b2f3aee2..f7d3b40b79fe0 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -856,10 +856,11 @@ def _check_data_length( data: list of array-likes containing the data column-wise. """ if not self.index_col and len(columns) != len(data) and columns: - # without this cast mypy thinks that last_entry could still be an ndarray - last_entry = cast(Scalar, data[-1]) if len(columns) == len(data) - 1 and np.all( - (is_object_dtype(last_entry) and last_entry == "") | isna(last_entry) + # error: No overload variant of "__ror__" of "ndarray" matches + # argument type "ExtensionArray" + (is_object_dtype(data[-1]) and data[-1] == "") + | isna(data[-1]) # type: ignore[operator] ): return warnings.warn( From 17c9779811f637ba95a991e0e6b538bda48d10c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 5 Mar 2022 23:51:33 -0500 Subject: [PATCH 07/18] remove unused Scalar --- pandas/io/parsers/base_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f7d3b40b79fe0..f9498c9b27592 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -31,7 +31,6 @@ from pandas._typing import ( ArrayLike, DtypeArg, - Scalar, ) from pandas.errors import ( ParserError, From 2d7f86a507a0dcc496e74b38ca3013e8e8d28d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 6 Mar 2022 12:03:02 -0500 Subject: [PATCH 08/18] get left | right on the same line to avoid mypy printing a 'note' --- pandas/io/parsers/base_parser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f9498c9b27592..0542ea839cd42 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -855,12 +855,11 @@ def _check_data_length( data: list of array-likes containing the data column-wise. """ if not self.index_col and len(columns) != len(data) and columns: - if len(columns) == len(data) - 1 and np.all( - # error: No overload variant of "__ror__" of "ndarray" matches - # argument type "ExtensionArray" - (is_object_dtype(data[-1]) and data[-1] == "") - | isna(data[-1]) # type: ignore[operator] - ): + empty_str = is_object_dtype(data[-1]) and data[-1] == "" + # error: No overload variant of "__ror__" of "ndarray" matches + # argument type "ExtensionArray" + empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator] + if len(columns) == len(data) - 1 and np.all(empty_str_or_na): return warnings.warn( "Length of header or names does not match length of data. This leads " From eb16bc363a6d723364f5eb53a6766bf904e25152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 7 Mar 2022 09:33:24 -0500 Subject: [PATCH 09/18] ArrayLike --- pandas/core/dtypes/missing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 86ef9265afd2c..1606bbe0eb3bb 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -59,13 +59,13 @@ if TYPE_CHECKING: from pandas._typing import ( + ArrayLike, NDFrame, NDFrameT, Scalar, npt, ) - from pandas.core.arrays.base import ExtensionArray from pandas.core.indexes.base import Index @@ -85,7 +85,7 @@ def isna(obj: Scalar) -> bool: @overload def isna( - obj: Index | ExtensionArray | np.ndarray | list, + obj: ArrayLike | Index | list, ) -> npt.NDArray[np.bool_]: ... @@ -328,7 +328,7 @@ def notna(obj: Scalar) -> bool: @overload def notna( - obj: Index | ExtensionArray | np.ndarray | list, + obj: ArrayLike | Index | list, ) -> npt.NDArray[np.bool_]: ... From b9288cb86f329aa42b594d07097023befbc81bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 7 Mar 2022 09:33:57 -0500 Subject: [PATCH 10/18] unsupported overloads --- pandas/core/base.py | 4 +--- pandas/core/dtypes/missing.py | 33 ++++++++++++++++++++------------- pandas/core/window/ewm.py | 5 ++--- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index e03b05f539e8f..74e119a42c974 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -769,9 +769,7 @@ def hasnans(self) -> bool: Enables various performance speedups. """ - # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" - # has no attribute "any" - return bool(isna(self).any()) # type: ignore[union-attr] + return bool(isna(self).any()) def isna(self): return isna(self._values) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 1606bbe0eb3bb..64d1b602d96a4 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -7,6 +7,7 @@ from functools import partial from typing import ( TYPE_CHECKING, + Literal, overload, ) @@ -66,7 +67,7 @@ npt, ) - from pandas.core.indexes.base import Index + from pandas.core.base import IndexOpsMixin isposinf_scalar = libmissing.isposinf_scalar @@ -79,24 +80,27 @@ @overload -def isna(obj: Scalar) -> bool: +def isna(obj: Scalar) -> bool: # type: ignore[misc] ... @overload -def isna( - obj: ArrayLike | Index | list, -) -> npt.NDArray[np.bool_]: +def isna(obj: ArrayLike | list) -> npt.NDArray[np.bool_]: # type: ignore[misc] ... @overload -def isna(obj: NDFrameT) -> NDFrameT: +def isna(obj: NDFrameT) -> NDFrameT: # type: ignore[misc] ... @overload -def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: +def isna(obj: IndexOpsMixin) -> npt.NDArray[np.bool_]: # type: ignore[misc] + ... + + +@overload +def isna(obj: object) -> Literal[False]: ... @@ -322,24 +326,27 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bo @overload -def notna(obj: Scalar) -> bool: +def notna(obj: Scalar) -> bool: # type: ignore[misc] ... @overload -def notna( - obj: ArrayLike | Index | list, -) -> npt.NDArray[np.bool_]: +def notna(obj: ArrayLike | list) -> npt.NDArray[np.bool_]: # type: ignore[misc] ... @overload -def notna(obj: NDFrameT) -> NDFrameT: +def notna(obj: NDFrameT) -> NDFrameT: # type: ignore[misc] ... @overload -def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: +def notna(obj: IndexOpsMixin) -> npt.NDArray[np.bool_]: # type: ignore[misc] + ... + + +@overload +def notna(obj: object) -> Literal[False]: ... diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 8ce5c2b6f80e2..db4ac03d9838a 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -391,9 +391,8 @@ def __init__( raise ValueError( "halflife must be a string or datetime.timedelta object" ) - # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" - # has no attribute "any" - if isna(self.times).any(): # type: ignore[union-attr] + # error: "Literal[False]" has no attribute "any" + if isna(self.times).any(): # type: ignore[attr-defined] raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM From c77248f2349b9e52b3ab2e9c29bc18d1b37fed07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 7 Mar 2022 10:15:50 -0500 Subject: [PATCH 11/18] all typing imports witihn TYPE_CHECKING --- pandas/core/dtypes/missing.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 64d1b602d96a4..9b5102c489d86 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -21,10 +21,6 @@ NaT, iNaT, ) -from pandas._typing import ( - ArrayLike, - DtypeObj, -) from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -61,6 +57,7 @@ if TYPE_CHECKING: from pandas._typing import ( ArrayLike, + DtypeObj, NDFrame, NDFrameT, Scalar, From 70d5d608267541581a9f88faf734cc2c1188ed67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 8 Mar 2022 09:55:05 -0500 Subject: [PATCH 12/18] Revert "unsupported overloads" This reverts commit b9288cb86f329aa42b594d07097023befbc81bfa. --- pandas/core/base.py | 4 +++- pandas/core/dtypes/missing.py | 33 +++++++++++++-------------------- pandas/core/window/ewm.py | 5 +++-- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 74e119a42c974..e03b05f539e8f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -769,7 +769,9 @@ def hasnans(self) -> bool: Enables various performance speedups. """ - return bool(isna(self).any()) + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" + # has no attribute "any" + return bool(isna(self).any()) # type: ignore[union-attr] def isna(self): return isna(self._values) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 9b5102c489d86..b6bda0898f161 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -7,7 +7,6 @@ from functools import partial from typing import ( TYPE_CHECKING, - Literal, overload, ) @@ -64,7 +63,7 @@ npt, ) - from pandas.core.base import IndexOpsMixin + from pandas.core.indexes.base import Index isposinf_scalar = libmissing.isposinf_scalar @@ -77,27 +76,24 @@ @overload -def isna(obj: Scalar) -> bool: # type: ignore[misc] +def isna(obj: Scalar) -> bool: ... @overload -def isna(obj: ArrayLike | list) -> npt.NDArray[np.bool_]: # type: ignore[misc] +def isna( + obj: ArrayLike | Index | list, +) -> npt.NDArray[np.bool_]: ... @overload -def isna(obj: NDFrameT) -> NDFrameT: # type: ignore[misc] +def isna(obj: NDFrameT) -> NDFrameT: ... @overload -def isna(obj: IndexOpsMixin) -> npt.NDArray[np.bool_]: # type: ignore[misc] - ... - - -@overload -def isna(obj: object) -> Literal[False]: +def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... @@ -323,27 +319,24 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bo @overload -def notna(obj: Scalar) -> bool: # type: ignore[misc] +def notna(obj: Scalar) -> bool: ... @overload -def notna(obj: ArrayLike | list) -> npt.NDArray[np.bool_]: # type: ignore[misc] +def notna( + obj: ArrayLike | Index | list, +) -> npt.NDArray[np.bool_]: ... @overload -def notna(obj: NDFrameT) -> NDFrameT: # type: ignore[misc] +def notna(obj: NDFrameT) -> NDFrameT: ... @overload -def notna(obj: IndexOpsMixin) -> npt.NDArray[np.bool_]: # type: ignore[misc] - ... - - -@overload -def notna(obj: object) -> Literal[False]: +def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index db4ac03d9838a..8ce5c2b6f80e2 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -391,8 +391,9 @@ def __init__( raise ValueError( "halflife must be a string or datetime.timedelta object" ) - # error: "Literal[False]" has no attribute "any" - if isna(self.times).any(): # type: ignore[attr-defined] + # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" + # has no attribute "any" + if isna(self.times).any(): # type: ignore[union-attr] raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM From 5f8b8560ca0d72d44d7974a184d43267b00f9c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 8 Mar 2022 10:50:56 -0500 Subject: [PATCH 13/18] handle unions --- pandas/core/dtypes/missing.py | 12 ++++++++++++ pandas/core/window/ewm.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index b6bda0898f161..4316109da1cbb 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -92,6 +92,12 @@ def isna(obj: NDFrameT) -> NDFrameT: ... +# handle unions +@overload +def isna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: + ... + + @overload def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... @@ -335,6 +341,12 @@ def notna(obj: NDFrameT) -> NDFrameT: ... +# handle unions +@overload +def notna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]: + ... + + @overload def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ... diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 8ce5c2b6f80e2..90638d89b4201 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -3,7 +3,10 @@ import datetime from functools import partial from textwrap import dedent -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) import warnings import numpy as np @@ -380,20 +383,17 @@ def __init__( FutureWarning, stacklevel=find_stack_level(), ) - self.times = self._selected_obj[self.times] + # self.times cannot be str anymore + self.times = cast(Series, self._selected_obj[self.times]) if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") - # error: Argument 1 to "len" has incompatible type "Union[str, ndarray, - # NDFrameT, None]"; expected "Sized" - if len(self.times) != len(obj): # type: ignore[arg-type] + if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object" ) - # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" - # has no attribute "any" - if isna(self.times).any(): # type: ignore[union-attr] + if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM From 7e65d8973aea6fc57df58e696815b415ab9685f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 8 Mar 2022 12:30:38 -0500 Subject: [PATCH 14/18] do not require Series to be imported at runtime --- pandas/core/window/ewm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 90638d89b4201..ef9a0b85ee6b3 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -384,7 +384,7 @@ def __init__( stacklevel=find_stack_level(), ) # self.times cannot be str anymore - self.times = cast(Series, self._selected_obj[self.times]) + self.times = cast("Series", self._selected_obj[self.times]) if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): From a0cf8607931ac6d6a4d42cc5dd5a6f5910a9e5b1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 8 Mar 2022 18:17:40 -0800 Subject: [PATCH 15/18] CI/TST: numpy 1.22.3 release fixes (#46274) --- pandas/core/indexes/multi.py | 4 +--- pandas/tests/frame/test_ufunc.py | 6 ------ 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f40857059a794..cdde510927081 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1088,9 +1088,7 @@ def _engine(self): # equivalent to sorting lexicographically the codes themselves. Notice # that each level needs to be shifted by the number of bits needed to # represent the _previous_ ones: - offsets = np.concatenate([lev_bits[1:], [0]]).astype( # type: ignore[arg-type] - "uint64" - ) + offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") # Check the total number of bits needed for our representation: if lev_bits[0] > 64: diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 2a4b212d0acd7..b884224315a61 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -263,12 +263,6 @@ def test_alignment_deprecation_many_inputs(request): ) if np_version_gte1p22: - mark = pytest.mark.xfail( - reason="ufunc 'my_ufunc' did not contain a loop with signature matching " - "types", - ) - request.node.add_marker(mark) - mark = pytest.mark.filterwarnings( "ignore:`np.MachAr` is deprecated.*:DeprecationWarning" ) From a29a9e0b5fd36cda0a087acae35b64048bddd5cc Mon Sep 17 00:00:00 2001 From: Ekaterina <32567581+EkaterinaKuzkina@users.noreply.github.com> Date: Wed, 9 Mar 2022 14:19:34 +0000 Subject: [PATCH 16/18] TYP: annotation of __init__ return type (PEP 484) (misc modules) (#46280) --- doc/make.py | 2 +- pandas/_config/config.py | 6 +++--- pandas/_testing/contexts.py | 2 +- pandas/compat/numpy/function.py | 2 +- pandas/conftest.py | 6 +++--- pandas/errors/__init__.py | 2 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/holiday.py | 4 ++-- scripts/validate_docstrings.py | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/make.py b/doc/make.py index 5d2476fcdca8d..c758c7fc84bbb 100755 --- a/doc/make.py +++ b/doc/make.py @@ -45,7 +45,7 @@ def __init__( single_doc=None, verbosity=0, warnings_are_errors=False, - ): + ) -> None: self.num_jobs = num_jobs self.include_api = include_api self.whatsnew = whatsnew diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 8815bb46318c4..58c9eae5fe7f3 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -204,7 +204,7 @@ def get_default_val(pat: str): class DictWrapper: """provide attribute-style access to a nested dict""" - def __init__(self, d: dict[str, Any], prefix: str = ""): + def __init__(self, d: dict[str, Any], prefix: str = "") -> None: object.__setattr__(self, "d", d) object.__setattr__(self, "prefix", prefix) @@ -248,7 +248,7 @@ def __dir__(self) -> Iterable[str]: class CallableDynamicDoc: - def __init__(self, func, doc_tmpl): + def __init__(self, func, doc_tmpl) -> None: self.__doc_tmpl__ = doc_tmpl self.__func__ = func @@ -422,7 +422,7 @@ class option_context(ContextDecorator): ... pass """ - def __init__(self, *args): + def __init__(self, *args) -> None: if len(args) % 2 != 0 or len(args) < 2: raise ValueError( "Need to invoke as option_context(pat, val, [(pat, val), ...])." diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 547ec9db20994..7df9afd68b432 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -228,7 +228,7 @@ class RNGContext: np.random.randn() """ - def __init__(self, seed): + def __init__(self, seed) -> None: self.seed = seed def __enter__(self): diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index cea1b80d340c8..e3aa5bb52f2ba 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -40,7 +40,7 @@ def __init__( fname=None, method: str | None = None, max_fname_arg_count=None, - ): + ) -> None: self.fname = fname self.method = method self.defaults = defaults diff --git a/pandas/conftest.py b/pandas/conftest.py index 8d5913ce0a9ae..a01dcd3269eb6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -447,7 +447,7 @@ def dict_subclass(): """ class TestSubDict(dict): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: dict.__init__(self, *args, **kwargs) return TestSubDict @@ -460,7 +460,7 @@ def non_dict_mapping_subclass(): """ class TestNonDictMapping(abc.Mapping): - def __init__(self, underlying_dict): + def __init__(self, underlying_dict) -> None: self._data = underlying_dict def __getitem__(self, key): @@ -1709,7 +1709,7 @@ class TestMemoryFS(MemoryFileSystem): protocol = "testmem" test = [None] - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.test[0] = kwargs.pop("test", None) super().__init__(**kwargs) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index cbe94673a8122..44f999cb1296a 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -186,7 +186,7 @@ class AbstractMethodError(NotImplementedError): while keeping compatibility with Python 2 and Python 3. """ - def __init__(self, class_instance, methodtype="method"): + def __init__(self, class_instance, methodtype="method") -> None: types = {"method", "classmethod", "staticmethod", "property"} if methodtype not in types: raise ValueError( diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 1088b3b1a79ea..9ce70ec38870c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -209,7 +209,7 @@ class _FrequencyInferer: Not sure if I can avoid the state machine here """ - def __init__(self, index, warn: bool = True): + def __init__(self, index, warn: bool = True) -> None: self.index = index self.i8values = index.asi8 diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 365406617ab46..6fd49e2340e30 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -160,7 +160,7 @@ def __init__( start_date=None, end_date=None, days_of_week=None, - ): + ) -> None: """ Parameters ---------- @@ -393,7 +393,7 @@ class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): end_date = Timestamp(datetime(2200, 12, 31)) _cache = None - def __init__(self, name=None, rules=None): + def __init__(self, name=None, rules=None) -> None: """ Initializes holiday object with a given set a rules. Normally classes just have the rules defined within them. diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index df1971b998bab..3a0c437c918fb 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -136,7 +136,7 @@ def get_api_items(api_doc_fd): class PandasDocstring(Validator): - def __init__(self, func_name: str, doc_obj=None): + def __init__(self, func_name: str, doc_obj=None) -> None: self.func_name = func_name if doc_obj is None: doc_obj = get_doc_object(Validator._load_obj(func_name)) From d1957f498653b2c811c55fe2d955310971ef957a Mon Sep 17 00:00:00 2001 From: Ekaterina <32567581+EkaterinaKuzkina@users.noreply.github.com> Date: Wed, 9 Mar 2022 14:20:56 +0000 Subject: [PATCH 17/18] TYP: annotation of __init__ return type (PEP 484) (pandas/tests) (#46278) --- pandas/tests/base/test_constructors.py | 2 +- pandas/tests/dtypes/test_inference.py | 6 +++--- pandas/tests/extension/arrow/arrays.py | 4 ++-- pandas/tests/extension/arrow/test_timestamp.py | 2 +- pandas/tests/extension/decimal/array.py | 4 ++-- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/list/array.py | 2 +- pandas/tests/extension/test_common.py | 2 +- pandas/tests/extension/test_extension.py | 2 +- pandas/tests/frame/constructors/test_from_records.py | 2 +- pandas/tests/frame/methods/test_select_dtypes.py | 4 ++-- pandas/tests/frame/methods/test_set_index.py | 4 ++-- pandas/tests/frame/methods/test_to_records.py | 2 +- pandas/tests/frame/test_arithmetic.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/groupby/test_counting.py | 2 +- pandas/tests/indexes/datetimes/test_timezones.py | 2 +- pandas/tests/indexes/test_index_new.py | 2 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/io/formats/test_console.py | 2 +- pandas/tests/io/json/test_pandas.py | 4 ++-- pandas/tests/io/json/test_readlines.py | 2 +- pandas/tests/io/json/test_ujson.py | 4 ++-- pandas/tests/io/parser/common/test_common_basic.py | 2 +- pandas/tests/io/parser/test_python_parser_only.py | 2 +- pandas/tests/io/parser/test_unsupported.py | 2 +- pandas/tests/io/test_common.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_pickle.py | 6 +++--- pandas/tests/io/test_sql.py | 2 +- pandas/tests/reshape/test_pivot.py | 2 +- pandas/tests/scalar/timedelta/test_arithmetic.py | 2 +- pandas/tests/series/methods/test_is_unique.py | 2 +- pandas/tests/series/test_ufunc.py | 4 ++-- pandas/tests/test_register_accessor.py | 6 +++--- pandas/tests/tseries/holiday/test_calendar.py | 2 +- 38 files changed, 51 insertions(+), 51 deletions(-) diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index c845be1d0d1f5..44d6bc57b0431 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -51,7 +51,7 @@ def bar(self, *args, **kwargs): pass class Delegate(PandasDelegate, PandasObject): - def __init__(self, obj): + def __init__(self, obj) -> None: self.obj = obj def test_invalid_delegation(self): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 99037226782db..bf13e6b7b4629 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -91,7 +91,7 @@ class MockNumpyLikeArray: a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either. """ - def __init__(self, values): + def __init__(self, values) -> None: self._values = values def __iter__(self): @@ -323,7 +323,7 @@ def test_is_dict_like_fails(ll): @pytest.mark.parametrize("has_contains", [True, False]) def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): class DictLike: - def __init__(self, d): + def __init__(self, d) -> None: self.d = d if has_keys: @@ -1937,7 +1937,7 @@ def test_is_scalar_number(self): # subclasses are. class Numeric(Number): - def __init__(self, value): + def __init__(self, value) -> None: self.value = value def __int__(self): diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 1ab3d49392052..33eef35153bce 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -179,7 +179,7 @@ def all(self, axis=0, out=None): class ArrowBoolArray(ArrowExtensionArray): - def __init__(self, values): + def __init__(self, values) -> None: if not isinstance(values, pa.ChunkedArray): raise ValueError @@ -189,7 +189,7 @@ def __init__(self, values): class ArrowStringArray(ArrowExtensionArray): - def __init__(self, values): + def __init__(self, values) -> None: if not isinstance(values, pa.ChunkedArray): raise ValueError diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py index 28e6ce0e77b34..b2750784ab3d6 100644 --- a/pandas/tests/extension/arrow/test_timestamp.py +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -40,7 +40,7 @@ def construct_array_type(cls) -> type_t[ArrowTimestampUSArray]: class ArrowTimestampUSArray(ArrowExtensionArray): - def __init__(self, values): + def __init__(self, values) -> None: if not isinstance(values, pa.ChunkedArray): raise ValueError diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index fe489c7605d0a..a3edc95fce96b 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -41,7 +41,7 @@ class DecimalDtype(ExtensionDtype): na_value = decimal.Decimal("NaN") _metadata = ("context",) - def __init__(self, context=None): + def __init__(self, context=None) -> None: self.context = context or decimal.getcontext() def __repr__(self) -> str: @@ -66,7 +66,7 @@ def _is_numeric(self) -> bool: class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray): __array_priority__ = 1000 - def __init__(self, values, dtype=None, copy=False, context=None): + def __init__(self, values, dtype=None, copy=False, context=None) -> None: for i, val in enumerate(values): if is_float(val): if np.isnan(val): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 2ce242baf2e5e..125a0aee2089d 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -67,7 +67,7 @@ class JSONArray(ExtensionArray): dtype = JSONDtype() __array_priority__ = 1000 - def __init__(self, values, dtype=None, copy=False): + def __init__(self, values, dtype=None, copy=False) -> None: for val in values: if not isinstance(val, self.dtype.type): raise TypeError("All values must be of type " + str(self.dtype.type)) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 47015ed334ddf..f281a0f82e0e7 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -44,7 +44,7 @@ class ListArray(ExtensionArray): dtype = ListDtype() __array_priority__ = 1000 - def __init__(self, values, dtype=None, copy=False): + def __init__(self, values, dtype=None, copy=False) -> None: if not isinstance(values, np.ndarray): raise TypeError("Need to pass a numpy array as values") for val in values: diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index e43650c291200..62bc250193564 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -14,7 +14,7 @@ class DummyDtype(dtypes.ExtensionDtype): class DummyArray(ExtensionArray): - def __init__(self, data): + def __init__(self, data) -> None: self.data = data def __array__(self, dtype): diff --git a/pandas/tests/extension/test_extension.py b/pandas/tests/extension/test_extension.py index 939b836a11556..1ed626cd51080 100644 --- a/pandas/tests/extension/test_extension.py +++ b/pandas/tests/extension/test_extension.py @@ -8,7 +8,7 @@ class MyEA(ExtensionArray): - def __init__(self, values): + def __init__(self, values) -> None: self._values = values diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 4aa150afadef6..c6d54e28ca1c8 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -194,7 +194,7 @@ def test_from_records_bad_index_column(self): def test_from_records_non_tuple(self): class Record: - def __init__(self, *args): + def __init__(self, *args) -> None: self.args = args def __getitem__(self, i): diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 4cfd9975652e3..9da6b61e67603 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -15,7 +15,7 @@ class DummyDtype(ExtensionDtype): type = int - def __init__(self, numeric): + def __init__(self, numeric) -> None: self._numeric = numeric @property @@ -28,7 +28,7 @@ def _is_numeric(self): class DummyArray(ExtensionArray): - def __init__(self, data, dtype): + def __init__(self, data, dtype) -> None: self.data = data self._dtype = dtype diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 1b3db10ec6158..4c39cf99f18ff 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -595,7 +595,7 @@ def test_set_index_custom_label_type(self): # GH#24969 class Thing: - def __init__(self, name, color): + def __init__(self, name, color) -> None: self.name = name self.color = color @@ -673,7 +673,7 @@ def test_set_index_custom_label_type_raises(self): # purposefully inherit from something unhashable class Thing(set): - def __init__(self, name, color): + def __init__(self, name, color) -> None: self.name = name self.color = color diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index a2e94782142ac..1a84fb73fd524 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -358,7 +358,7 @@ def test_to_records_dtype_mi(self, df, kwargs, expected): def test_to_records_dict_like(self): # see GH#18146 class DictLike: - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.d = kwargs.copy() def __getitem__(self, key): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index da5e7d15ab9d2..7c33242192d2e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -41,7 +41,7 @@ def switch_numexpr_min_elements(request): class DummyElement: - def __init__(self, value, dtype): + def __init__(self, value, dtype) -> None: self.value = value self.dtype = np.dtype(dtype) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e5b1673da1e27..82c7117cc00c6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1363,7 +1363,7 @@ def test_constructor_sequence_like(self): # collections.Sequence like class DummyContainer(abc.Sequence): - def __init__(self, lst): + def __init__(self, lst) -> None: self._lst = lst def __getitem__(self, n): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 19326c185075f..ba89a76a7f8c2 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1861,7 +1861,7 @@ def test_unstack_number_of_levels_larger_than_int32(self, monkeypatch): # GH 26314: Change ValueError to PerformanceWarning class MockUnstacker(reshape_lib._Unstacker): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: # __init__ will raise the warning super().__init__(*args, **kwargs) raise Exception("Don't compute final result.") diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9ec4179bf83fd..d5331b1060b23 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -42,7 +42,7 @@ class CustomDataFrame(DataFrame): custom plotting functions. """ - def __init__(self, *args, **kw): + def __init__(self, *args, **kw) -> None: super().__init__(*args, **kw) @property diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 73b2d8ac2c1f5..f0a3219d0b419 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -364,7 +364,7 @@ class RaisingObjectException(Exception): pass class RaisingObject: - def __init__(self, msg="I will raise inside Cython"): + def __init__(self, msg="I will raise inside Cython") -> None: super().__init__() self.msg = msg diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 9a1cb6a303c72..51bc054010aca 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -40,7 +40,7 @@ class FixedOffset(tzinfo): """Fixed offset in minutes east from UTC.""" - def __init__(self, offset, name): + def __init__(self, offset, name) -> None: self.__offset = timedelta(minutes=offset) self.__name = name diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 3052c9d7ee69b..9a57e3e08a59c 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -352,7 +352,7 @@ def test_constructor_ndarray_like(self, array): # it should be possible to convert any object that satisfies the numpy # ndarray interface directly into an Index class ArrayLike: - def __init__(self, array): + def __init__(self, array) -> None: self.array = array def __array__(self, dtype=None) -> np.ndarray: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index a965d32c82c61..426192ab46914 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1072,7 +1072,7 @@ def test_iloc_getitem_float_duplicates(self): def test_iloc_setitem_custom_object(self): # iloc with an object class TO: - def __init__(self, value): + def __init__(self, value) -> None: self.value = value def __str__(self) -> str: diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index 5bd73e6045e32..dd7b57df9baed 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -12,7 +12,7 @@ class MockEncoding: side effect should be an exception that will be raised. """ - def __init__(self, encoding): + def __init__(self, encoding) -> None: super().__init__() self.val = encoding diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e08b7592c4d82..985d9e47ea7bd 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -585,7 +585,7 @@ def test_frame_nonprintable_bytes(self): # GH14256: failing column caused segfaults, if it is not the last one class BinaryThing: - def __init__(self, hexed): + def __init__(self, hexed) -> None: self.hexed = hexed self.binary = bytes.fromhex(hexed) @@ -1817,7 +1817,7 @@ def test_to_json_multiindex_escape(self): def test_to_json_series_of_objects(self): class _TestObject: - def __init__(self, a, b, _c, d): + def __init__(self, a, b, _c, d) -> None: self.a = a self.b = b self._c = _c diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 4ba9f48a40fbc..6fa0c4d5c51a1 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -281,7 +281,7 @@ def test_chunksize_is_incremental(): ) class MyReader: - def __init__(self, contents): + def __init__(self, contents) -> None: self.read_count = 0 self.stringio = StringIO(contents) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index d3cf144451c95..93318bed2a6af 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -660,7 +660,7 @@ def toDict(self): def test_default_handler(self): class _TestObject: - def __init__(self, val): + def __init__(self, val) -> None: self.val = val @property @@ -714,7 +714,7 @@ def my_obj_handler(_): def test_encode_object(self): class _TestObject: - def __init__(self, a, b, _c, d): + def __init__(self, a, b, _c, d) -> None: self.a = a self.b = b self._c = _c diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index bde69e365cfd1..5472bd99fa746 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -41,7 +41,7 @@ def test_override_set_noconvert_columns(): # Usecols needs to be sorted in _set_noconvert_columns based # on the test_usecols_with_parse_dates test from test_usecols.py class MyTextFileReader(TextFileReader): - def __init__(self): + def __init__(self) -> None: self._currow = 0 self.squeeze = False diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 999a6217efb68..f72d22c9e5b0c 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -319,7 +319,7 @@ def test_python_engine_file_no_next(python_parser_only): parser = python_parser_only class NoNextBuffer: - def __init__(self, csv_data): + def __init__(self, csv_data) -> None: self.data = csv_data def __iter__(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index b28417c9a3625..7937f47e8bff5 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -114,7 +114,7 @@ def test_python_engine(self, python_engine): def test_python_engine_file_no_iter(self, python_engine): # see gh-16530 class NoNextBuffer: - def __init__(self, csv_data): + def __init__(self, csv_data) -> None: self.data = csv_data def __next__(self): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index adf4f32837acf..ca6809470b2b1 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -28,7 +28,7 @@ class CustomFSPath: """For testing fspath on unknown objects""" - def __init__(self, path): + def __init__(self, path) -> None: self.path = path def __fspath__(self): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 59a968bc4719a..99fa31726445a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1243,7 +1243,7 @@ def test_parse_failure_rewinds(self): # Issue #17975 class MockFile: - def __init__(self, data): + def __init__(self, data) -> None: self.data = data self.at_end = False diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 3ad8fd4051b48..8f19a54a5eedf 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -436,12 +436,12 @@ def python_pickler(obj, path): pickle.dump(obj, fh, protocol=-1) class MockReadResponse: - def __init__(self, path): + def __init__(self, path) -> None: self.file = open(path, "rb") if "gzip" in path: self.headers = {"Content-Encoding": "gzip"} else: - self.headers = {"Content-Encoding": None} + self.headers = {"Content-Encoding": ""} def __enter__(self): return self @@ -478,7 +478,7 @@ def test_pickle_fsspec_roundtrip(): class MyTz(datetime.tzinfo): - def __init__(self): + def __init__(self) -> None: pass diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 659fe7c30e552..31add4743d1e9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1538,7 +1538,7 @@ def test_con_unknown_dbapi2_class_does_not_error_without_sql_alchemy_installed( self, ): class MockSqliteConnection: - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: self.conn = sqlite3.Connection(*args, **kwargs) def __getattr__(self, name): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6c222669c37db..31f720b9ec336 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1999,7 +1999,7 @@ def test_pivot_number_of_levels_larger_than_int32(self, monkeypatch): # GH 20601 # GH 26314: Change ValueError to PerformanceWarning class MockUnstacker(reshape_lib._Unstacker): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: # __init__ will raise the warning super().__init__(*args, **kwargs) raise Exception("Don't compute final result.") diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index ff1f6ad42feb3..74aa7f045088e 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -988,7 +988,7 @@ def test_compare_custom_object(self): """ class CustomClass: - def __init__(self, cmp_result=None): + def __init__(self, cmp_result=None) -> None: self.cmp_result = cmp_result def generic_result(self): diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py index c696d365662ea..960057cb3d646 100644 --- a/pandas/tests/series/methods/test_is_unique.py +++ b/pandas/tests/series/methods/test_is_unique.py @@ -26,7 +26,7 @@ def test_is_unique(data, expected): def test_is_unique_class_ne(capsys): # GH#20661 class Foo: - def __init__(self, val): + def __init__(self, val) -> None: self._value = val def __ne__(self, other): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 43d33f5b498bc..b8cee317af287 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -238,7 +238,7 @@ def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): def test_object_series_ok(): class Dummy: - def __init__(self, value): + def __init__(self, value) -> None: self.value = value def __add__(self, other): @@ -413,7 +413,7 @@ def test_binary_ufunc_other_types(type_): def test_object_dtype_ok(): class Thing: - def __init__(self, value): + def __init__(self, value) -> None: self.value = value def __add__(self, other): diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 6e224245076ee..3e4e57414269a 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -14,7 +14,7 @@ class X(accessor.DirNamesMixin): x = 1 y: int - def __init__(self): + def __init__(self) -> None: self.z = 3 result = [attr_name for attr_name in dir(X()) if not attr_name.startswith("_")] @@ -38,7 +38,7 @@ def ensure_removed(obj, attr): class MyAccessor: - def __init__(self, obj): + def __init__(self, obj) -> None: self.obj = obj self.item = "item" @@ -102,7 +102,7 @@ def test_raises_attribute_error(): @pd.api.extensions.register_series_accessor("bad") class Bad: - def __init__(self, data): + def __init__(self, data) -> None: raise AttributeError("whoops") with pytest.raises(AttributeError, match="whoops"): diff --git a/pandas/tests/tseries/holiday/test_calendar.py b/pandas/tests/tseries/holiday/test_calendar.py index a1e3c1985a4d4..57acf15443ca8 100644 --- a/pandas/tests/tseries/holiday/test_calendar.py +++ b/pandas/tests/tseries/holiday/test_calendar.py @@ -50,7 +50,7 @@ def test_calendar_caching(): # see gh-9552. class TestCalendar(AbstractHolidayCalendar): - def __init__(self, name=None, rules=None): + def __init__(self, name=None, rules=None) -> None: super().__init__(name=name, rules=rules) jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)]) From c31969265665efb32cdd49a5a55bca1f3c262455 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 9 Mar 2022 09:22:09 -0500 Subject: [PATCH 18/18] TYP: add type annotation to DataFrame.to_pickle (#46262) --- pandas/core/generic.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a0621e1ff9306..80af10383e24e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2956,7 +2956,7 @@ def to_sql( ) def to_pickle( self, - path, + path: FilePath | WriteBuffer[bytes], compression: CompressionOptions = "infer", protocol: int = pickle.HIGHEST_PROTOCOL, storage_options: StorageOptions = None, @@ -2966,8 +2966,10 @@ def to_pickle( Parameters ---------- - path : str - File path where the pickled object will be stored. + path : str, path object, or file-like object + String, path object (implementing ``os.PathLike[str]``), or file-like + object implementing a binary ``write()`` function. File path where + the pickled object will be stored. {compression_options} protocol : int Int which indicates which protocol should be used by the pickler,