diff --git a/pandas-stubs/_libs/tslibs/timestamps.pyi b/pandas-stubs/_libs/tslibs/timestamps.pyi index 504b801cc..d99661b17 100644 --- a/pandas-stubs/_libs/tslibs/timestamps.pyi +++ b/pandas-stubs/_libs/tslibs/timestamps.pyi @@ -11,6 +11,7 @@ from time import struct_time from typing import ( ClassVar, Literal, + SupportsIndex, overload, ) @@ -48,7 +49,7 @@ _Nonexistent: TypeAlias = ( Literal["raise", "NaT", "shift_backward", "shift_forward"] | Timedelta | timedelta ) -class Timestamp(datetime): +class Timestamp(datetime, SupportsIndex): min: ClassVar[Timestamp] # pyright: ignore[reportIncompatibleVariableOverride] max: ClassVar[Timestamp] # pyright: ignore[reportIncompatibleVariableOverride] @@ -309,3 +310,5 @@ class Timestamp(datetime): @property def unit(self) -> TimeUnit: ... def as_unit(self, unit: TimeUnit, round_ok: bool = ...) -> Self: ... + # To support slicing + def __index__(self) -> int: ... diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 10f127b19..826ba3ad4 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -9,10 +9,12 @@ from collections.abc import ( ) import datetime as dt from re import Pattern +import sys from typing import ( Any, ClassVar, Literal, + NoReturn, overload, ) @@ -112,6 +114,7 @@ from pandas._typing import ( ReplaceMethod, Scalar, ScalarT, + SequenceNotStr, SeriesByT, SortKind, StataDateFormat, @@ -193,7 +196,11 @@ class _LocIndexerFrame(_LocIndexer): def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] self, idx: tuple[ - int | StrLike | tuple[Scalar, ...] | Callable[[DataFrame], ScalarT], + int + | StrLike + | Timestamp + | tuple[Scalar, ...] + | Callable[[DataFrame], ScalarT], int | StrLike | tuple[Scalar, ...], ], ) -> Scalar: ... @@ -206,6 +213,7 @@ class _LocIndexerFrame(_LocIndexer): IndexType | MaskType | _IndexSliceTuple + | SequenceNotStr[float | str | Timestamp] | Callable[ [DataFrame], ScalarT | list[HashableT] | IndexType | MaskType ], @@ -219,7 +227,9 @@ class _LocIndexerFrame(_LocIndexer): @overload def __setitem__( self, - idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt, + idx: ( + MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt | slice + ), value: Scalar | NAType | NaTType | ArrayLike | Series | DataFrame | list | None, ) -> None: ... @overload @@ -229,8 +239,32 @@ class _LocIndexerFrame(_LocIndexer): value: Scalar | NAType | NaTType | ArrayLike | Series | list | None, ) -> None: ... -class DataFrame(NDFrame, OpsMixin): - __hash__: ClassVar[None] # type: ignore[assignment] +# With mypy 1.14.1 and python 3.12, the second overload needs a type-ignore statement +if sys.version_info >= (3, 12): + class _GetItemHack: + @overload + def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + @overload + def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + self, key: Iterable[Hashable] | slice + ) -> DataFrame: ... + @overload + def __getitem__(self, key: Hashable) -> Series: ... + +else: + class _GetItemHack: + @overload + def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + @overload + def __getitem__( # pyright: ignore[reportOverlappingOverload] + self, key: Iterable[Hashable] | slice + ) -> DataFrame: ... + @overload + def __getitem__(self, key: Hashable) -> Series: ... + +class DataFrame(NDFrame, OpsMixin, _GetItemHack): + + __hash__: ClassVar[None] # type: ignore[assignment] # pyright: ignore[reportIncompatibleMethodOverride] @overload def __new__( @@ -607,14 +641,6 @@ class DataFrame(NDFrame, OpsMixin): @property def T(self) -> DataFrame: ... def __getattr__(self, name: str) -> Series: ... - @overload - def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] - @overload - def __getitem__( # pyright: ignore[reportOverlappingOverload] - self, key: Iterable[Hashable] | slice - ) -> DataFrame: ... - @overload - def __getitem__(self, key: Hashable) -> Series: ... def isetitem( self, loc: int | Sequence[int], value: Scalar | ArrayLike | list[Any] ) -> None: ... @@ -2453,6 +2479,7 @@ class DataFrame(NDFrame, OpsMixin): ) -> Self: ... def __truediv__(self, other: float | DataFrame | Series | Sequence) -> Self: ... def __rtruediv__(self, other: float | DataFrame | Series | Sequence) -> Self: ... + def __bool__(self) -> NoReturn: ... class _PandasNamedTuple(tuple[Any, ...]): def __getattr__(self, field: str) -> Scalar: ... diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index b91d3842b..bc1056af1 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -13,11 +13,13 @@ from datetime import ( time, timedelta, ) +from pathlib import Path from typing import ( Any, ClassVar, Generic, Literal, + NoReturn, overload, ) @@ -139,6 +141,7 @@ from pandas._typing import ( ReplaceMethod, Scalar, ScalarT, + SequenceNotStr, SeriesByT, SortKind, StrDtypeArg, @@ -195,8 +198,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): idx: ( MaskType | Index - | Sequence[float] - | list[str] + | SequenceNotStr[float | str | Timestamp] | slice | _IndexSliceTuple | Sequence[_IndexSliceTuple] @@ -208,7 +210,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): @overload def __setitem__( self, - idx: Index | MaskType, + idx: Index | MaskType | slice, value: S1 | ArrayLike | Series[S1] | None, ) -> None: ... @overload @@ -1030,6 +1032,14 @@ class Series(IndexOpsMixin[S1], NDFrame): **kwds, ) -> Series: ... @overload + def apply( + self, + func: Callable[..., BaseOffset], + convertDType: _bool = ..., + args: tuple = ..., + **kwds, + ) -> OffsetSeries: ... + @overload def apply( self, func: Callable[..., Series], @@ -1640,6 +1650,9 @@ class Series(IndexOpsMixin[S1], NDFrame): self, other: int | np_ndarray_anyint | Series[int] ) -> Series[int]: ... def __rsub__(self, other: num | _ListLike | Series[S1]) -> Series: ... + @overload + def __rtruediv__(self, other: Path) -> Series: ... + @overload def __rtruediv__(self, other: num | _ListLike | Series[S1]) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] @@ -1666,6 +1679,9 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> TimedeltaSeries: ... @overload def __sub__(self, other: num | _ListLike | Series) -> Series: ... + @overload + def __truediv__(self, other: Path) -> Series: ... + @overload def __truediv__(self, other: num | _ListLike | Series[S1]) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] @@ -2144,6 +2160,7 @@ class Series(IndexOpsMixin[S1], NDFrame): level: Level | None = ..., drop_level: _bool = ..., ) -> Self: ... + def __bool__(self) -> NoReturn: ... class TimestampSeries(Series[Timestamp]): @property diff --git a/pyproject.toml b/pyproject.toml index 11a3dd93b..c233df4c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,11 +33,11 @@ types-pytz = ">= 2022.1.1" numpy = ">= 1.23.5" [tool.poetry.group.dev.dependencies] -mypy = "1.13.0" +mypy = "1.14.1" pandas = "2.2.3" pyarrow = ">=10.0.1" pytest = ">=7.1.2" -pyright = ">= 1.1.390" +pyright = ">= 1.1.391" poethepoet = ">=0.16.5" loguru = ">=0.6.0" typing-extensions = ">=4.4.0" diff --git a/tests/test_frame.py b/tests/test_frame.py index 5ce029cfd..3ea1a6cef 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -7,7 +7,6 @@ Iterator, Mapping, MutableMapping, - Sequence, ) import csv import datetime @@ -39,7 +38,6 @@ from pandas.core.series import Series import pytest from typing_extensions import ( - Never, TypeAlias, assert_never, assert_type, @@ -2409,14 +2407,12 @@ def test_indexslice_getitem(): .set_index(["x", "y"]) ) ind = pd.Index([2, 3]) - # This next test is written this way to support both mypy 1.13 and newer - # versions of mypy and pyright that treat slice as a Generic due to - # a change in typeshed. - # Once pyright 1.1.390 and mypy 1.14 are released, the test can be - # reverted to the standard form. - # check(assert_type(pd.IndexSlice[ind, :], tuple["pd.Index[int]", slice]), tuple) - tmp = cast(tuple["pd.Index[int]", slice], pd.IndexSlice[ind, :]) # type: ignore[redundant-cast] - check(assert_type(tmp, tuple["pd.Index[int]", slice]), tuple) + check( + assert_type( + pd.IndexSlice[ind, :], tuple["pd.Index[int]", "slice[None, None, None]"] + ), + tuple, + ) check(assert_type(df.loc[pd.IndexSlice[ind, :]], pd.DataFrame), pd.DataFrame) check(assert_type(df.loc[pd.IndexSlice[1:2]], pd.DataFrame), pd.DataFrame) check( @@ -3765,22 +3761,38 @@ def test_info() -> None: check(assert_type(df.info(show_counts=None), None), type(None)) -def test_series_typed_dict() -> None: - """Test that no error is raised when constructing a series from a typed dict.""" +def test_frame_single_slice() -> None: + # GH 572 + df = pd.DataFrame([1, 2, 3]) + check(assert_type(df.loc[:], pd.DataFrame), pd.DataFrame) - class MyDict(TypedDict): - a: str - b: str + df.loc[:] = 1 + df - my_dict = MyDict(a="", b="") - sr = pd.Series(my_dict) - check(assert_type(sr, pd.Series), pd.Series) +def test_frame_index_timestamp() -> None: + # GH 620 + dt1 = pd.to_datetime("2023-05-01") + dt2 = pd.to_datetime("2023-05-02") + s = pd.Series([1, 2], index=[dt1, dt2]) + df = pd.DataFrame(s) + # Next result is Series or DataFrame because the index could be a MultiIndex + check(assert_type(df.loc[dt1, :], pd.Series | pd.DataFrame), pd.Series) + check(assert_type(df.loc[[dt1], :], pd.DataFrame), pd.DataFrame) + df2 = pd.DataFrame({"x": s}) + check(assert_type(df2.loc[dt1, "x"], Scalar), np.integer) + check(assert_type(df2.loc[[dt1], "x"], pd.Series), pd.Series, np.integer) -def test_series_empty_dtype() -> None: - """Test for the creation of a Series from an empty list GH571 to map to a Series[Any].""" - new_tab: Sequence[Never] = [] # need to be typehinted to please mypy - check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series) - check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) - # ensure that an empty string does not get matched to Sequence[Never] - check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) + +def test_frame_bool_fails() -> None: + # GH 663 + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + try: + # We want the type checker to tell us the next line is invalid + # mypy doesn't seem to figure that out, but pyright does + if df == "foo": # pyright: ignore[reportGeneralTypeIssues] + # Next line is unreachable. + s = df["a"] + except ValueError: + pass diff --git a/tests/test_series.py b/tests/test_series.py index 87055fb74..e6401cd81 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -16,6 +16,7 @@ Any, Generic, Literal, + TypedDict, TypeVar, Union, cast, @@ -33,6 +34,7 @@ from pandas.core.window import ExponentialMovingWindow import pytest from typing_extensions import ( + Never, Self, TypeAlias, assert_never, @@ -3435,3 +3437,95 @@ def test_series_unique_timedelta() -> None: """Test type return of Series.unique on Series[timedeta64[ns]].""" sr = pd.Series([pd.Timedelta("1 days"), pd.Timedelta("3 days")]) check(assert_type(sr.unique(), TimedeltaArray), TimedeltaArray) + + +def test_slice_timestamp() -> None: + dti = pd.date_range("1/1/2025", "2/28/2025") + + s = pd.Series([i for i in range(len(dti))], index=dti) + + # For `s1`, see discussion in GH 397. Needs mypy fix. + # s1 = s.loc["2025-01-15":"2025-01-20"] + + # GH 397 + check( + assert_type( + s.loc[pd.Timestamp("2025-01-15") : pd.Timestamp("2025-01-20")], + "pd.Series[int]", + ), + pd.Series, + np.integer, + ) + + +def test_apply_dateoffset() -> None: + # GH 454 + months = [1, 2, 3] + s = pd.Series(months) + check( + assert_type(s.apply(lambda x: pd.DateOffset(months=x)), "OffsetSeries"), + pd.Series, + pd.DateOffset, + ) + + +def test_series_single_slice() -> None: + # GH 572 + s = pd.Series([1, 2, 3]) + check(assert_type(s.loc[:], "pd.Series[int]"), pd.Series, np.integer) + + s.loc[:] = 1 + s + + +def test_series_typed_dict() -> None: + """Test that no error is raised when constructing a series from a typed dict.""" + + class MyDict(TypedDict): + a: str + b: str + + my_dict = MyDict(a="", b="") + sr = pd.Series(my_dict) + check(assert_type(sr, pd.Series), pd.Series) + + +def test_series_empty_dtype() -> None: + """Test for the creation of a Series from an empty list GH571 to map to a Series[Any].""" + new_tab: Sequence[Never] = [] # need to be typehinted to please mypy + check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series) + check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series) + # ensure that an empty string does not get matched to Sequence[Never] + check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) + + +def test_series_index_timestamp() -> None: + # GH 620 + dt1 = pd.to_datetime("2023-05-01") + dt2 = pd.to_datetime("2023-05-02") + s = pd.Series([1, 2], index=[dt1, dt2]) + check(assert_type(s[dt1], int), np.integer) + check(assert_type(s.loc[[dt1]], "pd.Series[int]"), pd.Series, np.integer) + + +def test_series_bool_fails() -> None: + # GH 663 + s = pd.Series([1, 2, 3]) + + try: + # We want the type checker to tell us the next line is invalid + # mypy doesn't seem to figure that out, but pyright does + if s == "foo": # pyright: ignore[reportGeneralTypeIssues] + # Next line is unreachable. + a = s[0] + except ValueError: + pass + + +def test_path_div() -> None: + # GH 682 + folder = Path.cwd() + files = pd.Series(["a.png", "b.png"]) + check(assert_type(folder / files, pd.Series), pd.Series, Path) + + folders = pd.Series([folder, folder]) + check(assert_type(folders / Path("a.png"), pd.Series), pd.Series, Path)