From e2c11d0a7625e8782ee995bdb44bd0fd52892ae1 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2025 14:42:53 -0500 Subject: [PATCH 1/5] update mypy and allow timestamp slicing --- pandas-stubs/_libs/tslibs/timestamps.pyi | 5 ++++- pyproject.toml | 4 ++-- tests/test_frame.py | 14 ++++++-------- tests/test_series.py | 19 +++++++++++++++++++ 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pandas-stubs/_libs/tslibs/timestamps.pyi b/pandas-stubs/_libs/tslibs/timestamps.pyi index 504b801cc..d99661b17 100644 --- a/pandas-stubs/_libs/tslibs/timestamps.pyi +++ b/pandas-stubs/_libs/tslibs/timestamps.pyi @@ -11,6 +11,7 @@ from time import struct_time from typing import ( ClassVar, Literal, + SupportsIndex, overload, ) @@ -48,7 +49,7 @@ _Nonexistent: TypeAlias = ( Literal["raise", "NaT", "shift_backward", "shift_forward"] | Timedelta | timedelta ) -class Timestamp(datetime): +class Timestamp(datetime, SupportsIndex): min: ClassVar[Timestamp] # pyright: ignore[reportIncompatibleVariableOverride] max: ClassVar[Timestamp] # pyright: ignore[reportIncompatibleVariableOverride] @@ -309,3 +310,5 @@ class Timestamp(datetime): @property def unit(self) -> TimeUnit: ... def as_unit(self, unit: TimeUnit, round_ok: bool = ...) -> Self: ... + # To support slicing + def __index__(self) -> int: ... diff --git a/pyproject.toml b/pyproject.toml index 11a3dd93b..c233df4c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,11 +33,11 @@ types-pytz = ">= 2022.1.1" numpy = ">= 1.23.5" [tool.poetry.group.dev.dependencies] -mypy = "1.13.0" +mypy = "1.14.1" pandas = "2.2.3" pyarrow = ">=10.0.1" pytest = ">=7.1.2" -pyright = ">= 1.1.390" +pyright = ">= 1.1.391" poethepoet = ">=0.16.5" loguru = ">=0.6.0" typing-extensions = ">=4.4.0" diff --git a/tests/test_frame.py b/tests/test_frame.py index 5ce029cfd..48230cf3b 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2409,14 +2409,12 @@ def test_indexslice_getitem(): .set_index(["x", "y"]) ) ind = pd.Index([2, 3]) - # This next test is written this way to support both mypy 1.13 and newer - # versions of mypy and pyright that treat slice as a Generic due to - # a change in typeshed. - # Once pyright 1.1.390 and mypy 1.14 are released, the test can be - # reverted to the standard form. - # check(assert_type(pd.IndexSlice[ind, :], tuple["pd.Index[int]", slice]), tuple) - tmp = cast(tuple["pd.Index[int]", slice], pd.IndexSlice[ind, :]) # type: ignore[redundant-cast] - check(assert_type(tmp, tuple["pd.Index[int]", slice]), tuple) + check( + assert_type( + pd.IndexSlice[ind, :], tuple["pd.Index[int]", "slice[None, None, None]"] + ), + tuple, + ) check(assert_type(df.loc[pd.IndexSlice[ind, :]], pd.DataFrame), pd.DataFrame) check(assert_type(df.loc[pd.IndexSlice[1:2]], pd.DataFrame), pd.DataFrame) check( diff --git a/tests/test_series.py b/tests/test_series.py index 87055fb74..524d12d4b 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -3435,3 +3435,22 @@ def test_series_unique_timedelta() -> None: """Test type return of Series.unique on Series[timedeta64[ns]].""" sr = pd.Series([pd.Timedelta("1 days"), pd.Timedelta("3 days")]) check(assert_type(sr.unique(), TimedeltaArray), TimedeltaArray) + + +def test_slice_timestamp() -> None: + dti = pd.date_range("1/1/2025", "2/28/2025") + + s = pd.Series([i for i in range(len(dti))], index=dti) + + # For `s1`, see discussion in GH 397. Needs mypy fix. + # s1 = s.loc["2025-01-15":"2025-01-20"] + + # GH 397 + check( + assert_type( + s.loc[pd.Timestamp("2025-01-15") : pd.Timestamp("2025-01-20")], + "pd.Series[int]", + ), + pd.Series, + np.integer, + ) From dd8b73aaabc5c4d5740ce15f4d21194383dc34a8 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2025 15:14:31 -0500 Subject: [PATCH 2/5] fix for getitem with pythong 3.12 and mypy --- pandas-stubs/core/frame.pyi | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 10f127b19..1bb3b4e54 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -9,6 +9,7 @@ from collections.abc import ( ) import datetime as dt from re import Pattern +import sys from typing import ( Any, ClassVar, @@ -229,8 +230,32 @@ class _LocIndexerFrame(_LocIndexer): value: Scalar | NAType | NaTType | ArrayLike | Series | list | None, ) -> None: ... -class DataFrame(NDFrame, OpsMixin): - __hash__: ClassVar[None] # type: ignore[assignment] +# With mypy 1.14.1 and python 3.12, the second overload needs a type-ignore statement +if sys.version_info >= (3, 12): + class _GetItemHack: + @overload + def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + @overload + def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + self, key: Iterable[Hashable] | slice + ) -> DataFrame: ... + @overload + def __getitem__(self, key: Hashable) -> Series: ... + +else: + class _GetItemHack: + @overload + def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + @overload + def __getitem__( # pyright: ignore[reportOverlappingOverload] + self, key: Iterable[Hashable] | slice + ) -> DataFrame: ... + @overload + def __getitem__(self, key: Hashable) -> Series: ... + +class DataFrame(NDFrame, OpsMixin, _GetItemHack): + + __hash__: ClassVar[None] # type: ignore[assignment] # pyright: ignore[reportIncompatibleMethodOverride] @overload def __new__( @@ -607,14 +632,6 @@ class DataFrame(NDFrame, OpsMixin): @property def T(self) -> DataFrame: ... def __getattr__(self, name: str) -> Series: ... - @overload - def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] - @overload - def __getitem__( # pyright: ignore[reportOverlappingOverload] - self, key: Iterable[Hashable] | slice - ) -> DataFrame: ... - @overload - def __getitem__(self, key: Hashable) -> Series: ... def isetitem( self, loc: int | Sequence[int], value: Scalar | ArrayLike | list[Any] ) -> None: ... From a2ed7b55ac8ae4c6f70036142179f81555ef72af Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2025 15:26:24 -0500 Subject: [PATCH 3/5] allow apply to return an offset --- pandas-stubs/core/series.pyi | 8 ++++++++ tests/test_series.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index b91d3842b..6d104138d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1030,6 +1030,14 @@ class Series(IndexOpsMixin[S1], NDFrame): **kwds, ) -> Series: ... @overload + def apply( + self, + func: Callable[..., BaseOffset], + convertDType: _bool = ..., + args: tuple = ..., + **kwds, + ) -> OffsetSeries: ... + @overload def apply( self, func: Callable[..., Series], diff --git a/tests/test_series.py b/tests/test_series.py index 524d12d4b..f710b931f 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -3454,3 +3454,14 @@ def test_slice_timestamp() -> None: pd.Series, np.integer, ) + + +def test_apply_dateoffset() -> None: + # GH 454 + months = [1, 2, 3] + s = pd.Series(months) + check( + assert_type(s.apply(lambda x: pd.DateOffset(months=x)), "OffsetSeries"), + pd.Series, + pd.DateOffset, + ) From e3d0a16f8bcd709035891a270653fd1b0c7d4010 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2025 15:52:59 -0500 Subject: [PATCH 4/5] allow loc setitem to accept a slice --- pandas-stubs/core/frame.pyi | 4 +++- pandas-stubs/core/series.pyi | 2 +- tests/test_frame.py | 25 +++++-------------------- tests/test_series.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 1bb3b4e54..93bd98026 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -220,7 +220,9 @@ class _LocIndexerFrame(_LocIndexer): @overload def __setitem__( self, - idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt, + idx: ( + MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt | slice + ), value: Scalar | NAType | NaTType | ArrayLike | Series | DataFrame | list | None, ) -> None: ... @overload diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 6d104138d..ab285c548 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -208,7 +208,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): @overload def __setitem__( self, - idx: Index | MaskType, + idx: Index | MaskType | slice, value: S1 | ArrayLike | Series[S1] | None, ) -> None: ... @overload diff --git a/tests/test_frame.py b/tests/test_frame.py index 48230cf3b..25542cba9 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -7,7 +7,6 @@ Iterator, Mapping, MutableMapping, - Sequence, ) import csv import datetime @@ -39,7 +38,6 @@ from pandas.core.series import Series import pytest from typing_extensions import ( - Never, TypeAlias, assert_never, assert_type, @@ -3763,22 +3761,9 @@ def test_info() -> None: check(assert_type(df.info(show_counts=None), None), type(None)) -def test_series_typed_dict() -> None: - """Test that no error is raised when constructing a series from a typed dict.""" +def test_frame_single_slice() -> None: + # GH 572 + df = pd.DataFrame([1, 2, 3]) + check(assert_type(df.loc[:], pd.DataFrame), pd.DataFrame) - class MyDict(TypedDict): - a: str - b: str - - my_dict = MyDict(a="", b="") - sr = pd.Series(my_dict) - check(assert_type(sr, pd.Series), pd.Series) - - -def test_series_empty_dtype() -> None: - """Test for the creation of a Series from an empty list GH571 to map to a Series[Any].""" - new_tab: Sequence[Never] = [] # need to be typehinted to please mypy - check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series) - check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) - # ensure that an empty string does not get matched to Sequence[Never] - check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) + df.loc[:] = 1 + df diff --git a/tests/test_series.py b/tests/test_series.py index f710b931f..4ff5b8a48 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -16,6 +16,7 @@ Any, Generic, Literal, + TypedDict, TypeVar, Union, cast, @@ -33,6 +34,7 @@ from pandas.core.window import ExponentialMovingWindow import pytest from typing_extensions import ( + Never, Self, TypeAlias, assert_never, @@ -3465,3 +3467,32 @@ def test_apply_dateoffset() -> None: pd.Series, pd.DateOffset, ) + + +def test_series_single_slice() -> None: + # GH 572 + s = pd.Series([1, 2, 3]) + check(assert_type(s.loc[:], "pd.Series[int]"), pd.Series, np.integer) + + s.loc[:] = 1 + s + + +def test_series_typed_dict() -> None: + """Test that no error is raised when constructing a series from a typed dict.""" + + class MyDict(TypedDict): + a: str + b: str + + my_dict = MyDict(a="", b="") + sr = pd.Series(my_dict) + check(assert_type(sr, pd.Series), pd.Series) + + +def test_series_empty_dtype() -> None: + """Test for the creation of a Series from an empty list GH571 to map to a Series[Any].""" + new_tab: Sequence[Never] = [] # need to be typehinted to please mypy + check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series) + check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) + # ensure that an empty string does not get matched to Sequence[Never] + check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) From 34185303a835149be68b52e81979f9346ad51177 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2025 16:21:08 -0500 Subject: [PATCH 5/5] allow indexing with .loc using Timestamp --- pandas-stubs/core/frame.pyi | 8 +++++++- pandas-stubs/core/series.pyi | 4 ++-- tests/test_frame.py | 14 ++++++++++++++ tests/test_series.py | 9 +++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 93bd98026..5ec5f45ef 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -113,6 +113,7 @@ from pandas._typing import ( ReplaceMethod, Scalar, ScalarT, + SequenceNotStr, SeriesByT, SortKind, StataDateFormat, @@ -194,7 +195,11 @@ class _LocIndexerFrame(_LocIndexer): def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] self, idx: tuple[ - int | StrLike | tuple[Scalar, ...] | Callable[[DataFrame], ScalarT], + int + | StrLike + | Timestamp + | tuple[Scalar, ...] + | Callable[[DataFrame], ScalarT], int | StrLike | tuple[Scalar, ...], ], ) -> Scalar: ... @@ -207,6 +212,7 @@ class _LocIndexerFrame(_LocIndexer): IndexType | MaskType | _IndexSliceTuple + | SequenceNotStr[float | str | Timestamp] | Callable[ [DataFrame], ScalarT | list[HashableT] | IndexType | MaskType ], diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index ab285c548..0d1cd084e 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -139,6 +139,7 @@ from pandas._typing import ( ReplaceMethod, Scalar, ScalarT, + SequenceNotStr, SeriesByT, SortKind, StrDtypeArg, @@ -195,8 +196,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): idx: ( MaskType | Index - | Sequence[float] - | list[str] + | SequenceNotStr[float | str | Timestamp] | slice | _IndexSliceTuple | Sequence[_IndexSliceTuple] diff --git a/tests/test_frame.py b/tests/test_frame.py index 25542cba9..dec4e0f71 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -3767,3 +3767,17 @@ def test_frame_single_slice() -> None: check(assert_type(df.loc[:], pd.DataFrame), pd.DataFrame) df.loc[:] = 1 + df + + +def test_frame_index_timestamp() -> None: + # GH 620 + dt1 = pd.to_datetime("2023-05-01") + dt2 = pd.to_datetime("2023-05-02") + s = pd.Series([1, 2], index=[dt1, dt2]) + df = pd.DataFrame(s) + # Next result is Series or DataFrame because the index could be a MultiIndex + check(assert_type(df.loc[dt1, :], pd.Series | pd.DataFrame), pd.Series) + check(assert_type(df.loc[[dt1], :], pd.DataFrame), pd.DataFrame) + df2 = pd.DataFrame({"x": s}) + check(assert_type(df2.loc[dt1, "x"], Scalar), np.integer) + check(assert_type(df2.loc[[dt1], "x"], pd.Series), pd.Series, np.integer) diff --git a/tests/test_series.py b/tests/test_series.py index 4ff5b8a48..4efafd67c 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -3496,3 +3496,12 @@ def test_series_empty_dtype() -> None: check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) # ensure that an empty string does not get matched to Sequence[Never] check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) + + +def test_series_index_timestamp() -> None: + # GH 620 + dt1 = pd.to_datetime("2023-05-01") + dt2 = pd.to_datetime("2023-05-02") + s = pd.Series([1, 2], index=[dt1, dt2]) + check(assert_type(s[dt1], int), np.integer) + check(assert_type(s.loc[[dt1]], "pd.Series[int]"), pd.Series, np.integer)