From 829440d8949e31a2b147ce2a266d4765babd9534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 16:02:51 -0400 Subject: [PATCH 01/13] TYP: Series/DataFrame are not Hashable --- pandas-stubs/core/generic.pyi | 3 ++- tests/test_frame.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index 71ab8de23..f0d44fd7e 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -2,6 +2,7 @@ import sys from typing import ( Any, Callable, + ClassVar, Dict, Hashable, Iterator, @@ -89,7 +90,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): def bool(self) -> _bool: ... def __abs__(self) -> NDFrame: ... def __round__(self, decimals: int = ...) -> NDFrame: ... - def __hash__(self): ... + __hash__: ClassVar[None] # type: ignore[assignment] def __iter__(self) -> Iterator: ... def keys(self): ... def iteritems(self): ... diff --git a/tests/test_frame.py b/tests/test_frame.py index 31e768cb4..fdd807380 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1141,3 +1141,10 @@ def test_frame_ndarray_assignmment() -> None: df_b = pd.DataFrame({"a": [0.0] * 10, "b": [1.0] * 10}) df_b.iloc[:, :] = np.array([[-1.0, np.inf]] * 10) + +def test_not_hashable() -> None: + # GH 113 + assert_type(pd.DataFrame.__hash__, None) + assert_type(pd.DataFrame().__hash__, None) + assert_type(pd.Series.__hash__, None) + assert_type(pd.Series().__hash__, None) From 53bbb51a533d0c939eac5b84f3d8b20f61072d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 16:15:28 -0400 Subject: [PATCH 02/13] explicitly set __hash__ also in the sub-classes (they inherit from classes that have proper __hash__) --- pandas-stubs/core/frame.pyi | 3 +++ pandas-stubs/core/series.pyi | 2 ++ tests/test_frame.py | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 21f73e136..fee00fb9b 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -5,6 +5,7 @@ import datetime as _dt from typing import ( Any, Callable, + ClassVar, Dict, Hashable, Iterable, @@ -183,6 +184,8 @@ class DataFrame(NDFrame, OpsMixin): Index, Series, ] + __hash__: ClassVar[None] # type: ignore[assignment] + def __new__( cls, data: Optional[Union[_ListLike, DataFrame, Dict[Any, Any]]] = ..., diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 4612a4f52..c74fc5a33 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -5,6 +5,7 @@ from datetime import ( from typing import ( Any, Callable, + ClassVar, Dict, Generic, Hashable, @@ -138,6 +139,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): class Series(IndexOpsMixin, NDFrame, Generic[S1]): _ListLike = Union[ArrayLike, Dict[_str, np.ndarray], List, Tuple, Index] + __hash__: ClassVar[None] @overload def __new__( cls, diff --git a/tests/test_frame.py b/tests/test_frame.py index fdd807380..17d31f982 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1142,9 +1142,10 @@ def test_frame_ndarray_assignmment() -> None: df_b = pd.DataFrame({"a": [0.0] * 10, "b": [1.0] * 10}) df_b.iloc[:, :] = np.array([[-1.0, np.inf]] * 10) + def test_not_hashable() -> None: # GH 113 assert_type(pd.DataFrame.__hash__, None) assert_type(pd.DataFrame().__hash__, None) assert_type(pd.Series.__hash__, None) - assert_type(pd.Series().__hash__, None) + assert_type(pd.Series([], dtype=object).__hash__, None) From 4d57f05bd3f95e1cb02fd6be0d57b90ffc3930b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 17:52:31 -0400 Subject: [PATCH 03/13] cover a few more __hash__; and try to check types at runtime --- pandas-stubs/_typing.pyi | 2 +- pandas-stubs/core/frame.pyi | 6 +++--- pandas-stubs/core/indexes/base.pyi | 3 ++- pandas-stubs/core/indexes/frozen.pyi | 2 +- pandas-stubs/core/series.pyi | 1 + tests/__init__.py | 27 +++++++++++++++++++++++++++ tests/test_frame.py | 2 ++ tests/test_indexes.py | 4 ++-- 8 files changed, 39 insertions(+), 8 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 7d44d7649..ca5ce32ac 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -200,5 +200,5 @@ XMLParsers = Literal["lxml", "etree"] # Any plain Python or numpy function Function = Union[np.ufunc, Callable[..., Any]] GroupByObject = Union[ - Label, List[Label], Function, Series, np.ndarray, Mapping[Label, Any] + Label, List[Label], Function, Series, np.ndarray, Mapping[Label, Any], Index ] diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index fee00fb9b..f14f71da2 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -431,7 +431,7 @@ class DataFrame(NDFrame, OpsMixin): *, axis: Axis = ..., index: Hashable | Sequence[Hashable] = ..., - columns: Hashable | Sequence[Hashable] = ..., + columns: Hashable | Sequence[Hashable] | Index = ..., level: Optional[Level] = ..., inplace: Literal[True], errors: IgnoreRaise = ..., @@ -443,7 +443,7 @@ class DataFrame(NDFrame, OpsMixin): *, axis: Axis = ..., index: Hashable | Sequence[Hashable] = ..., - columns: Hashable | Sequence[Hashable] = ..., + columns: Hashable | Sequence[Hashable] | Index = ..., level: Optional[Level] = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., @@ -455,7 +455,7 @@ class DataFrame(NDFrame, OpsMixin): *, axis: Axis = ..., index: Hashable | Sequence[Hashable] = ..., - columns: Hashable | Sequence[Hashable] = ..., + columns: Hashable | Sequence[Hashable] | Index = ..., level: Optional[Level] = ..., inplace: bool = ..., errors: IgnoreRaise = ..., diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index 3a893d9f3..f56c3a3aa 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -1,5 +1,6 @@ from typing import ( Callable, + ClassVar, Dict, Hashable, Iterable, @@ -169,7 +170,7 @@ class Index(IndexOpsMixin, PandasObject): def where(self, cond, other=...): ... def is_type_compatible(self, kind) -> bool: ... def __contains__(self, key) -> bool: ... - def __hash__(self) -> int: ... + __hash__: ClassVar[None] # type: ignore[assignment] def __setitem__(self, key, value) -> None: ... @overload def __getitem__( diff --git a/pandas-stubs/core/indexes/frozen.pyi b/pandas-stubs/core/indexes/frozen.pyi index 309777c86..e23878fa6 100644 --- a/pandas-stubs/core/indexes/frozen.pyi +++ b/pandas-stubs/core/indexes/frozen.pyi @@ -8,4 +8,4 @@ class FrozenList(PandasObject, list): def __eq__(self, other) -> bool: ... def __mul__(self, other): ... def __reduce__(self): ... - def __hash__(self): ... + def __hash__(self) -> int: ... # type: ignore[override] diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index c74fc5a33..9d73c4e1d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -140,6 +140,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): _ListLike = Union[ArrayLike, Dict[_str, np.ndarray], List, Tuple, Index] __hash__: ClassVar[None] + @overload def __new__( cls, diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb..cb48b15a2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,27 @@ +import typing_extensions +import types + + +def assert_type(actual_object, expected): + # rough check whether the types might match + + actual = type(actual_object).__name__.split(".")[-1] + error = False + + if isinstance(expected, types.GenericAlias): # type: ignore[attr-defined] + expected = expected.__name__ + + if isinstance(expected, str): + actual = actual.lower() + expected = expected.lower() + error = actual not in expected + elif expected is None: + error = actual_object is not None + else: + error = not isinstance(actual_object, expected) + + if error: + raise TypeError(f"Expected '{expected}' got '{actual}'") + + +typing_extensions.assert_type = assert_type diff --git a/tests/test_frame.py b/tests/test_frame.py index 17d31f982..990d6662f 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1149,3 +1149,5 @@ def test_not_hashable() -> None: assert_type(pd.DataFrame().__hash__, None) assert_type(pd.Series.__hash__, None) assert_type(pd.Series([], dtype=object).__hash__, None) + assert_type(pd.Index.__hash__, None) + assert_type(pd.Index([]).__hash__, None) diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 11157e6e2..bc186ffb7 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -8,9 +8,9 @@ def test_index_unique() -> None: df = pd.DataFrame({"x": [1, 2, 3, 4]}, index=pd.Index([1, 2, 3, 2])) ind = df.index - assert_type(ind, "pd.Index") + assert_type(ind, pd.Index) i2 = ind.unique() - assert_type(i2, "pd.Index") + assert_type(i2, pd.Index) def test_index_isin() -> None: From 5ef1675010a7ec233f23b7dd49d3a35c5a155648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 17:57:47 -0400 Subject: [PATCH 04/13] just in case it would affect static type checking --- tests/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index cb48b15a2..896f203f8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,7 @@ -import typing_extensions import types +from typing import TYPE_CHECKING + +import typing_extensions def assert_type(actual_object, expected): @@ -24,4 +26,5 @@ def assert_type(actual_object, expected): raise TypeError(f"Expected '{expected}' got '{actual}'") -typing_extensions.assert_type = assert_type +if not TYPE_CHECKING: + typing_extensions.assert_type = assert_type From 25b59a76f47b3f3a13b81118744fef6a012a1a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:03:04 -0400 Subject: [PATCH 05/13] cache only poetry.lock --- .github/workflows/test.yml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 49ee440c5..cb681cec7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,26 +26,12 @@ jobs: - name: Install Poetry run: pip install poetry - - name: Determine poetry cache-dir - run: | - echo "::set-output name=PATH::$(poetry config cache-dir)" - id: cache_path - - name: Cache project dependencies id : cache uses: actions/cache@v3 - with: - path: | - ${{ steps.cache_path.outputs.PATH }} - poetry.lock - key: ${{ matrix.os }}-${{ matrix.python-version }}-poetry-${{ hashFiles('pyproject.toml') }} - restore-keys: ${{ matrix.os }}-${{ matrix.python-version }}-poetry- - - - name: Delete poetry.lock on cache miss - if: steps.cache.outputs.cache-hit != 'true' - uses: JesseTG/rm@v1.0.3 with: path: poetry.lock + key: ${{ matrix.os }}-${{ matrix.python-version }}-poetry-${{ hashFiles('pyproject.toml') }} - name: Install project dependencies run: poetry install -vvv --no-root From 77fe1f001acb5706a579e32727c78af684af3ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:06:36 -0400 Subject: [PATCH 06/13] remove runtime check --- tests/__init__.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 896f203f8..e69de29bb 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,30 +0,0 @@ -import types -from typing import TYPE_CHECKING - -import typing_extensions - - -def assert_type(actual_object, expected): - # rough check whether the types might match - - actual = type(actual_object).__name__.split(".")[-1] - error = False - - if isinstance(expected, types.GenericAlias): # type: ignore[attr-defined] - expected = expected.__name__ - - if isinstance(expected, str): - actual = actual.lower() - expected = expected.lower() - error = actual not in expected - elif expected is None: - error = actual_object is not None - else: - error = not isinstance(actual_object, expected) - - if error: - raise TypeError(f"Expected '{expected}' got '{actual}'") - - -if not TYPE_CHECKING: - typing_extensions.assert_type = assert_type From 97535f1176d161b6d49a4eab710645dac5d66bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:17:17 -0400 Subject: [PATCH 07/13] assert assert_type --- tests/test_frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 990d6662f..25867fa36 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1145,9 +1145,9 @@ def test_frame_ndarray_assignmment() -> None: def test_not_hashable() -> None: # GH 113 - assert_type(pd.DataFrame.__hash__, None) - assert_type(pd.DataFrame().__hash__, None) - assert_type(pd.Series.__hash__, None) - assert_type(pd.Series([], dtype=object).__hash__, None) - assert_type(pd.Index.__hash__, None) - assert_type(pd.Index([]).__hash__, None) + assert assert_type(pd.DataFrame.__hash__, None) is None + assert assert_type(pd.DataFrame().__hash__, None) is None + assert assert_type(pd.Series.__hash__, None) is None + assert assert_type(pd.Series([], dtype=object).__hash__, None) is None + assert assert_type(pd.Index.__hash__, None) is None + assert assert_type(pd.Index([]).__hash__, None) is None From 069094b2b48cf435adcb37de4ce0adb5a3440f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:23:03 -0400 Subject: [PATCH 08/13] undo change for the runtype hack --- tests/test_indexes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_indexes.py b/tests/test_indexes.py index bc186ffb7..11157e6e2 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -8,9 +8,9 @@ def test_index_unique() -> None: df = pd.DataFrame({"x": [1, 2, 3, 4]}, index=pd.Index([1, 2, 3, 2])) ind = df.index - assert_type(ind, pd.Index) + assert_type(ind, "pd.Index") i2 = ind.unique() - assert_type(i2, pd.Index) + assert_type(i2, "pd.Index") def test_index_isin() -> None: From dc110fe5b31ebd1181c701b7b860bb7085afee65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:27:54 -0400 Subject: [PATCH 09/13] poetry.lock should not be dependent on the OS/python version --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cb681cec7..5ff17037a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,12 +26,12 @@ jobs: - name: Install Poetry run: pip install poetry - - name: Cache project dependencies + - name: Cache poetry.lock id : cache uses: actions/cache@v3 with: path: poetry.lock - key: ${{ matrix.os }}-${{ matrix.python-version }}-poetry-${{ hashFiles('pyproject.toml') }} + key: poetry-${{ hashFiles('pyproject.toml') }} - name: Install project dependencies run: poetry install -vvv --no-root From b24c300df91003ef0bec670a7b9479fb0446e70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 18:32:19 -0400 Subject: [PATCH 10/13] Empty commit From f949971c65eb0982ddd1e0356d4c72cc2cbba79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 20:27:32 -0400 Subject: [PATCH 11/13] move ClassVar to the top --- pandas-stubs/core/indexes/base.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index f56c3a3aa..dfbe9c757 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -42,6 +42,8 @@ class InvalidIndexError(Exception): ... _str = str class Index(IndexOpsMixin, PandasObject): + __hash__: ClassVar[None] # type: ignore[assignment] + def __new__( cls, data: Iterable = ..., @@ -170,7 +172,6 @@ class Index(IndexOpsMixin, PandasObject): def where(self, cond, other=...): ... def is_type_compatible(self, kind) -> bool: ... def __contains__(self, key) -> bool: ... - __hash__: ClassVar[None] # type: ignore[assignment] def __setitem__(self, key, value) -> None: ... @overload def __getitem__( From e61ae6fe5b06c2d7a6ae96813c8ae72fc73dd9fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 20:44:01 -0400 Subject: [PATCH 12/13] negative type tests --- tests/test_frame.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_frame.py b/tests/test_frame.py index 25867fa36..16a0a95c5 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -7,6 +7,7 @@ TYPE_CHECKING, Any, Dict, + Hashable, Iterable, List, Tuple, @@ -1151,3 +1152,10 @@ def test_not_hashable() -> None: assert assert_type(pd.Series([], dtype=object).__hash__, None) is None assert assert_type(pd.Index.__hash__, None) is None assert assert_type(pd.Index([]).__hash__, None) is None + + def test_func(h: Hashable): + pass + + test_func(pd.DataFrame()) # type: ignore[arg-type] + test_func(pd.Series([], dtype=object)) # type: ignore[arg-type] + test_func(pd.Index([])) # type: ignore[arg-type] From 5452279e7183c3b370bdde2db7b66e5a3a1ef70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 7 Jul 2022 21:19:18 -0400 Subject: [PATCH 13/13] move to top --- pandas-stubs/core/generic.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index f0d44fd7e..058962d9d 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -49,6 +49,8 @@ _bool = bool _str = str class NDFrame(PandasObject, indexing.IndexingMixin): + __hash__: ClassVar[None] # type: ignore[assignment] + def __new__( cls, data: BlockManager, @@ -90,7 +92,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin): def bool(self) -> _bool: ... def __abs__(self) -> NDFrame: ... def __round__(self, decimals: int = ...) -> NDFrame: ... - __hash__: ClassVar[None] # type: ignore[assignment] def __iter__(self) -> Iterator: ... def keys(self): ... def iteritems(self): ...