From 96ec4204f561ef8f621bf47f141676b803ebfdec Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 2 Apr 2024 14:08:50 +0200 Subject: [PATCH 1/3] adding tests for select_dtypes --- tests/test_frame.py | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/test_frame.py b/tests/test_frame.py index 099d608b1..b1ffd22db 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -3154,6 +3154,58 @@ def test_convert_dtypes_dtype_backend() -> None: check(assert_type(dfn, pd.DataFrame), pd.DataFrame) +def test_select_dtypes() -> None: + df = pd.DataFrame({"a": [1, 2] * 3, "b": [True, False] * 3, "c": [1.0, 2.0] * 3}) + check(assert_type(df.select_dtypes("number"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.select_dtypes(np.number), pd.DataFrame), pd.DataFrame) + check(assert_type(df.select_dtypes(object), pd.DataFrame), pd.DataFrame) + check(assert_type(df.select_dtypes(include="bool"), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.select_dtypes(include=["float64"], exclude=None), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.select_dtypes(exclude=["int64"], include=None), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.select_dtypes(exclude=["int64", object]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.select_dtypes( + exclude=[ + np.datetime64, + "datetime64", + "datetime", + np.timedelta64, + "timedelta", + "timedelta64", + "category", + "datetimetz", + "datetime64[ns]", + ] + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + if TYPE_CHECKING_INVALID_USAGE: + # not able to check with typing that inputs lists are empty + # check( + # assert_type(df.select_dtypes([], []), pd.DataFrame), pd.DataFrame + # ) # ValueError + + # ValueError : + check(assert_type(df.select_dtypes(), pd.DataFrame), pd.DataFrame) # type: ignore[assert-type, call-overload] # pyright: ignore[reportAssertTypeFailure, reportCallIssue] + + # any kind of string dtype is not allowed but strings dtypes are included in AstypeArg... + # check( + # assert_type(df.select_dtypes(str), pd.DataFrame), pd.DataFrame + # ) # TypeError + + def test_to_json_mode() -> None: df = pd.DataFrame( [["a", "b"], ["c", "d"]], From eb6ffa6663cbc9d46aef39a523f2c6e3d4e5967d Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 2 Apr 2024 15:20:43 +0200 Subject: [PATCH 2/3] adding tests for select_dtypes, resolves #887 --- pandas-stubs/core/frame.pyi | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 44b3a080e..69aeead1d 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -50,7 +50,10 @@ from pandas.core.window.rolling import ( Rolling, Window, ) -from typing_extensions import Self +from typing_extensions import ( + Self, + TypeAlias, +) import xarray as xr from pandas._libs.lib import NoDefault @@ -608,10 +611,35 @@ class DataFrame(NDFrame, OpsMixin): self, expr: _str, *, inplace: Literal[False] = ..., **kwargs ) -> DataFrame: ... def eval(self, expr: _str, *, inplace: _bool = ..., **kwargs): ... + AstypeArgExt: TypeAlias = ( + AstypeArg + | Literal[ + "number", + "datetime64", + "datetime", + "timedelta", + "timedelta64", + "datetimetz", + "datetime64[ns]", + ] + ) + AstypeArgExtList: TypeAlias = AstypeArgExt | list[AstypeArgExt] + @overload + def select_dtypes( + self, + include: AstypeArgExtList, + exclude: AstypeArgExtList | None = ..., + ) -> DataFrame: ... + @overload + def select_dtypes( + self, + include: AstypeArgExtList | None, + exclude: AstypeArgExtList, + ) -> DataFrame: ... + @overload def select_dtypes( self, - include: _str | list[_str] | None = ..., - exclude: _str | list[_str] | None = ..., + exclude: AstypeArgExtList, ) -> DataFrame: ... def insert( self, From ec64188a168a37a858a4439886217a37c978d92d Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 2 Apr 2024 21:20:10 +0200 Subject: [PATCH 3/3] better handling of exclusions in arguments --- pandas-stubs/core/frame.pyi | 14 ++++++++++++++ tests/test_frame.py | 20 ++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 69aeead1d..066bedeb4 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -51,6 +51,7 @@ from pandas.core.window.rolling import ( Window, ) from typing_extensions import ( + Never, Self, TypeAlias, ) @@ -115,6 +116,7 @@ from pandas._typing import ( SortKind, StataDateFormat, StorageOptions, + StrDtypeArg, StrLike, Suffixes, T as _T, @@ -625,6 +627,18 @@ class DataFrame(NDFrame, OpsMixin): ) AstypeArgExtList: TypeAlias = AstypeArgExt | list[AstypeArgExt] @overload + def select_dtypes( + self, include: StrDtypeArg, exclude: AstypeArgExtList | None = ... + ) -> Never: ... + @overload + def select_dtypes( + self, include: AstypeArgExtList | None, exclude: StrDtypeArg + ) -> Never: ... + @overload + def select_dtypes(self, exclude: StrDtypeArg) -> Never: ... + @overload + def select_dtypes(self, include: list[Never], exclude: list[Never]) -> Never: ... + @overload def select_dtypes( self, include: AstypeArgExtList, diff --git a/tests/test_frame.py b/tests/test_frame.py index b1ffd22db..0eb9b8bb3 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -38,6 +38,7 @@ import pytest from typing_extensions import ( TypeAlias, + assert_never, assert_type, ) import xarray as xr @@ -3192,18 +3193,13 @@ def test_select_dtypes() -> None: pd.DataFrame, ) if TYPE_CHECKING_INVALID_USAGE: - # not able to check with typing that inputs lists are empty - # check( - # assert_type(df.select_dtypes([], []), pd.DataFrame), pd.DataFrame - # ) # ValueError - - # ValueError : - check(assert_type(df.select_dtypes(), pd.DataFrame), pd.DataFrame) # type: ignore[assert-type, call-overload] # pyright: ignore[reportAssertTypeFailure, reportCallIssue] - - # any kind of string dtype is not allowed but strings dtypes are included in AstypeArg... - # check( - # assert_type(df.select_dtypes(str), pd.DataFrame), pd.DataFrame - # ) # TypeError + # include and exclude shall not be both empty + assert_never(df.select_dtypes([], [])) + assert_never(df.select_dtypes()) + # str like dtypes are not allowed + assert_never(df.select_dtypes(str)) + assert_never(df.select_dtypes(exclude=str)) + assert_never(df.select_dtypes(None, str)) def test_to_json_mode() -> None: