Skip to content

Commit e35c3ca

Browse files
authored
Rework groupby and resample core modules (#848)
* Rework groupby and resample core modules * Runtime Series is not generic * Remove default values and deprecated * Use np.integer in tests * Add a comment for Incomplete * Remove private objects * Remove deprecated Resampler.fillna * Remove private constructors * Remove more private constructors * Temporarily type labelsize as int Needs fix everywhere and the upstream docs should be updated * Tighten rolling and expanding method type * Remove step from groupby rolling * Fix resample rule * Fix groupby fillna * Add missing test for linked issue * Remove pandas.core.apply as it is not used in public code * Address CR * Address remaining CR * revert pyproject change * Temporarily pin pyright to unblock CI * New deprecations * Windows tests * Apply suggestions from code review
1 parent 56eafc1 commit e35c3ca

28 files changed

+2325
-851
lines changed

pandas-stubs/_libs/properties.pyi

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ class CachedProperty:
55
def __get__(self, obj, typ): ...
66
def __set__(self, obj, value) -> None: ...
77

8-
cache_readonly: CachedProperty = ...
8+
# note: this is a lie to make type checkers happy (they special
9+
# case property). cache_readonly uses attribute names similar to
10+
# property (fget) but it does not provide fset and fdel.
11+
cache_readonly = property
912

1013
class AxisProperty:
1114
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...

pandas-stubs/_typing.pyi

+20
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ from pandas.core.dtypes.dtypes import (
4848

4949
from pandas.io.formats.format import EngFormatter
5050

51+
# `Incomplete` is equivalent to `Any`. Use it to annotate symbols that you don't
52+
# know the type of yet and that should be changed in the future. Use `Any` only
53+
# where it is the only acceptable type.
54+
Incomplete: TypeAlias = Any
55+
5156
ArrayLike: TypeAlias = ExtensionArray | np.ndarray
5257
AnyArrayLike: TypeAlias = Index | Series | np.ndarray
5358
PythonScalar: TypeAlias = str | bool | complex
@@ -80,6 +85,10 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
8085
us: DatetimeDictArg
8186
ns: DatetimeDictArg
8287

88+
CorrelationMethod: TypeAlias = (
89+
Literal["pearson", "kendall", "spearman"]
90+
| Callable[[np.ndarray, np.ndarray], float]
91+
)
8392
# dtypes
8493
NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | object]
8594
Dtype: TypeAlias = ExtensionDtype | NpDtype
@@ -444,6 +453,7 @@ class SequenceNotStr(Protocol[_T_co]):
444453
IndexLabel: TypeAlias = Hashable | Sequence[Hashable]
445454
Label: TypeAlias = Hashable | None
446455
Level: TypeAlias = Hashable | int
456+
Shape: TypeAlias = tuple[int, ...]
447457
Suffixes: TypeAlias = tuple[str | None, str | None]
448458
Ordered: TypeAlias = bool | None
449459
JSONSerializable: TypeAlias = PythonScalar | list | dict
@@ -469,8 +479,11 @@ AggFuncTypeSeriesToFrame: TypeAlias = list[AggFuncTypeBase] | AggFuncTypeDictSer
469479
AggFuncTypeFrame: TypeAlias = (
470480
AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDictFrame
471481
)
482+
AggFuncTypeDict: TypeAlias = AggFuncTypeDictSeries | AggFuncTypeDictFrame
483+
AggFuncType: TypeAlias = AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDict
472484

473485
num: TypeAlias = complex
486+
AxisInt: TypeAlias = int
474487
AxisIndex: TypeAlias = Literal["index", 0]
475488
AxisColumn: TypeAlias = Literal["columns", 1]
476489
Axis: TypeAlias = AxisIndex | AxisColumn
@@ -563,6 +576,9 @@ IndexT = TypeVar("IndexT", bound=Index)
563576
IntervalT = TypeVar("IntervalT", bound=Interval)
564577
IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"]
565578

579+
ScalarIndexer: TypeAlias = int | np.integer
580+
SequenceIndexer: TypeAlias = slice | list[int] | np.ndarray
581+
PositionalIndexer: TypeAlias = ScalarIndexer | SequenceIndexer
566582
TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer]
567583

568584
IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"]
@@ -758,5 +774,9 @@ RandomState: TypeAlias = (
758774
| np.random.BitGenerator
759775
| np.random.RandomState
760776
)
777+
Frequency: TypeAlias = str | BaseOffset
778+
TimeGrouperOrigin: TypeAlias = (
779+
Timestamp | Literal["epoch", "start", "start_day", "end", "end_day"]
780+
)
761781

762782
__all__ = ["npt", "type_t"]

pandas-stubs/core/base.pyi

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
from collections.abc import Iterator
1+
from collections.abc import (
2+
Hashable,
3+
Iterator,
4+
)
25
from typing import (
6+
Any,
37
Generic,
48
Literal,
9+
final,
510
)
611

712
import numpy as np
@@ -19,13 +24,19 @@ from pandas._typing import (
1924
Scalar,
2025
npt,
2126
)
27+
from pandas.util._decorators import cache_readonly
2228

2329
class NoNewAttributesMixin:
24-
def __setattr__(self, key, value) -> None: ...
30+
def __setattr__(self, key: str, value: Any) -> None: ...
2531

2632
class SelectionMixin(Generic[NDFrameT]):
33+
obj: NDFrameT
34+
exclusions: frozenset[Hashable]
35+
@final
36+
@cache_readonly
2737
def ndim(self) -> int: ...
2838
def __getitem__(self, key): ...
39+
def aggregate(self, func, *args, **kwargs): ...
2940

3041
class IndexOpsMixin(OpsMixin, Generic[S1]):
3142
__array_priority__: int = ...

pandas-stubs/core/frame.pyi

+17-40
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ from pandas.core.indexing import (
4141
_LocIndexer,
4242
)
4343
from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
44-
from pandas.core.resample import Resampler
4544
from pandas.core.series import Series
4645
from pandas.core.window import (
4746
Expanding,
@@ -54,6 +53,7 @@ from pandas.core.window.rolling import (
5453
from typing_extensions import Self
5554
import xarray as xr
5655

56+
from pandas._libs.lib import NoDefault
5757
from pandas._libs.missing import NAType
5858
from pandas._libs.tslibs import BaseOffset
5959
from pandas._libs.tslibs.nattype import NaTType
@@ -1006,104 +1006,96 @@ class DataFrame(NDFrame, OpsMixin):
10061006
def groupby(
10071007
self,
10081008
by: Scalar,
1009-
axis: AxisIndex = ...,
1009+
axis: AxisIndex | NoDefault = ...,
10101010
level: IndexLabel | None = ...,
10111011
as_index: _bool = ...,
10121012
sort: _bool = ...,
10131013
group_keys: _bool = ...,
1014-
squeeze: _bool = ...,
1015-
observed: _bool = ...,
1014+
observed: _bool | NoDefault = ...,
10161015
dropna: _bool = ...,
10171016
) -> DataFrameGroupBy[Scalar]: ...
10181017
@overload
10191018
def groupby(
10201019
self,
10211020
by: DatetimeIndex,
1022-
axis: AxisIndex = ...,
1021+
axis: AxisIndex | NoDefault = ...,
10231022
level: IndexLabel | None = ...,
10241023
as_index: _bool = ...,
10251024
sort: _bool = ...,
10261025
group_keys: _bool = ...,
1027-
squeeze: _bool = ...,
1028-
observed: _bool = ...,
1026+
observed: _bool | NoDefault = ...,
10291027
dropna: _bool = ...,
10301028
) -> DataFrameGroupBy[Timestamp]: ...
10311029
@overload
10321030
def groupby(
10331031
self,
10341032
by: TimedeltaIndex,
1035-
axis: AxisIndex = ...,
1033+
axis: AxisIndex | NoDefault = ...,
10361034
level: IndexLabel | None = ...,
10371035
as_index: _bool = ...,
10381036
sort: _bool = ...,
10391037
group_keys: _bool = ...,
1040-
squeeze: _bool = ...,
1041-
observed: _bool = ...,
1038+
observed: _bool | NoDefault = ...,
10421039
dropna: _bool = ...,
10431040
) -> DataFrameGroupBy[Timedelta]: ...
10441041
@overload
10451042
def groupby(
10461043
self,
10471044
by: PeriodIndex,
1048-
axis: AxisIndex = ...,
1045+
axis: AxisIndex | NoDefault = ...,
10491046
level: IndexLabel | None = ...,
10501047
as_index: _bool = ...,
10511048
sort: _bool = ...,
10521049
group_keys: _bool = ...,
1053-
squeeze: _bool = ...,
1054-
observed: _bool = ...,
1050+
observed: _bool | NoDefault = ...,
10551051
dropna: _bool = ...,
10561052
) -> DataFrameGroupBy[Period]: ...
10571053
@overload
10581054
def groupby(
10591055
self,
10601056
by: IntervalIndex[IntervalT],
1061-
axis: AxisIndex = ...,
1057+
axis: AxisIndex | NoDefault = ...,
10621058
level: IndexLabel | None = ...,
10631059
as_index: _bool = ...,
10641060
sort: _bool = ...,
10651061
group_keys: _bool = ...,
1066-
squeeze: _bool = ...,
1067-
observed: _bool = ...,
1062+
observed: _bool | NoDefault = ...,
10681063
dropna: _bool = ...,
10691064
) -> DataFrameGroupBy[IntervalT]: ...
10701065
@overload
10711066
def groupby(
10721067
self,
10731068
by: MultiIndex | GroupByObjectNonScalar | None = ...,
1074-
axis: AxisIndex = ...,
1069+
axis: AxisIndex | NoDefault = ...,
10751070
level: IndexLabel | None = ...,
10761071
as_index: _bool = ...,
10771072
sort: _bool = ...,
10781073
group_keys: _bool = ...,
1079-
squeeze: _bool = ...,
1080-
observed: _bool = ...,
1074+
observed: _bool | NoDefault = ...,
10811075
dropna: _bool = ...,
10821076
) -> DataFrameGroupBy[tuple]: ...
10831077
@overload
10841078
def groupby(
10851079
self,
10861080
by: Series[SeriesByT],
1087-
axis: AxisIndex = ...,
1081+
axis: AxisIndex | NoDefault = ...,
10881082
level: IndexLabel | None = ...,
10891083
as_index: _bool = ...,
10901084
sort: _bool = ...,
10911085
group_keys: _bool = ...,
1092-
squeeze: _bool = ...,
1093-
observed: _bool = ...,
1086+
observed: _bool | NoDefault = ...,
10941087
dropna: _bool = ...,
10951088
) -> DataFrameGroupBy[SeriesByT]: ...
10961089
@overload
10971090
def groupby(
10981091
self,
10991092
by: CategoricalIndex | Index | Series,
1100-
axis: AxisIndex = ...,
1093+
axis: AxisIndex | NoDefault = ...,
11011094
level: IndexLabel | None = ...,
11021095
as_index: _bool = ...,
11031096
sort: _bool = ...,
11041097
group_keys: _bool = ...,
1105-
squeeze: _bool = ...,
1106-
observed: _bool = ...,
1098+
observed: _bool | NoDefault = ...,
11071099
dropna: _bool = ...,
11081100
) -> DataFrameGroupBy[Any]: ...
11091101
def pivot(
@@ -1921,21 +1913,6 @@ class DataFrame(NDFrame, OpsMixin):
19211913
*,
19221914
inplace: Literal[False] = ...,
19231915
) -> DataFrame: ...
1924-
def resample(
1925-
self,
1926-
rule,
1927-
axis: Axis = ...,
1928-
closed: _str | None = ...,
1929-
label: _str | None = ...,
1930-
convention: TimestampConvention = ...,
1931-
kind: Literal["timestamp", "period"] | None = ...,
1932-
on: _str | None = ...,
1933-
level: Level | None = ...,
1934-
origin: Timestamp
1935-
| Literal["epoch", "start", "start_day", "end", "end_day"] = ...,
1936-
offset: dt.timedelta | Timedelta | _str | None = ...,
1937-
group_keys: _bool = ...,
1938-
) -> Resampler[DataFrame]: ...
19391916
def rfloordiv(
19401917
self,
19411918
other,

pandas-stubs/core/generic.pyi

+23
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ from collections.abc import (
55
Mapping,
66
Sequence,
77
)
8+
import datetime as dt
89
import sqlite3
910
from typing import (
1011
Any,
@@ -17,13 +18,15 @@ from typing import (
1718
import numpy as np
1819
from pandas import Index
1920
import pandas.core.indexing as indexing
21+
from pandas.core.resample import DatetimeIndexResampler
2022
from pandas.core.series import Series
2123
import sqlalchemy.engine
2224
from typing_extensions import (
2325
Concatenate,
2426
Self,
2527
)
2628

29+
from pandas._libs.lib import NoDefault
2730
from pandas._typing import (
2831
S1,
2932
ArrayLike,
@@ -37,6 +40,7 @@ from pandas._typing import (
3740
FilePath,
3841
FileWriteMode,
3942
FillnaOptions,
43+
Frequency,
4044
HashableT1,
4145
HashableT2,
4246
HDFCompLib,
@@ -48,6 +52,10 @@ from pandas._typing import (
4852
SortKind,
4953
StorageOptions,
5054
T,
55+
TimedeltaConvertibleTypes,
56+
TimeGrouperOrigin,
57+
TimestampConvention,
58+
TimestampConvertibleTypes,
5159
WriteBuffer,
5260
)
5361

@@ -432,6 +440,21 @@ class NDFrame(indexing.IndexingMixin):
432440
end_time,
433441
axis=...,
434442
) -> Self: ...
443+
@final
444+
def resample(
445+
self,
446+
rule: Frequency | dt.timedelta,
447+
axis: Axis | NoDefault = ...,
448+
closed: Literal["right", "left"] | None = ...,
449+
label: Literal["right", "left"] | None = ...,
450+
convention: TimestampConvention = ...,
451+
kind: Literal["period", "timestamp"] | None = ...,
452+
on: Level | None = ...,
453+
level: Level | None = ...,
454+
origin: TimeGrouperOrigin | TimestampConvertibleTypes = ...,
455+
offset: TimedeltaConvertibleTypes | None = ...,
456+
group_keys: _bool = ...,
457+
) -> DatetimeIndexResampler[Self]: ...
435458
def first(self, offset) -> Self: ...
436459
def last(self, offset) -> Self: ...
437460
def rank(
+14-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
1-
from pandas.core.groupby.generic import NamedAgg as NamedAgg
1+
from pandas.core.groupby.generic import (
2+
DataFrameGroupBy as DataFrameGroupBy,
3+
NamedAgg as NamedAgg,
4+
SeriesGroupBy as SeriesGroupBy,
5+
)
6+
from pandas.core.groupby.groupby import GroupBy as GroupBy
27
from pandas.core.groupby.grouper import Grouper as Grouper
8+
9+
__all__ = [
10+
"DataFrameGroupBy",
11+
"NamedAgg",
12+
"SeriesGroupBy",
13+
"GroupBy",
14+
"Grouper",
15+
]

pandas-stubs/core/groupby/base.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# from pandas.core.dtypes.common import is_list_like as is_list_like, is_scalar as is_scalar
21
from collections.abc import Hashable
32
import dataclasses
43

Original file line numberDiff line numberDiff line change
@@ -1,6 +0,0 @@
1-
from pandas.core.arrays.categorical import ( # , CategoricalDtype as CategoricalDtype
2-
Categorical,
3-
)
4-
5-
def recode_for_groupby(c: Categorical, sort: bool, observed: bool): ...
6-
def recode_from_groupby(c: Categorical, sort: bool, ci): ...

0 commit comments

Comments
 (0)