Skip to content

Rework groupby and resample core modules #848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
--exit-non-zero-on-fix,
--target-version, py39,
--extend-select, "PYI,UP,RUF100",
--ignore, "E501,E731,F841,PYI042",
--ignore, "E501,E731,F841,PYI042,PYI053",
--per-file-ignores, "_*.pyi:PYI001",
--fix
]
Expand Down
5 changes: 4 additions & 1 deletion pandas-stubs/_libs/properties.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ class CachedProperty:
def __get__(self, obj, typ): ...
def __set__(self, obj, value) -> None: ...

cache_readonly: CachedProperty = ...
# note: this is a lie to make type checkers happy (they special
# case property). cache_readonly uses attribute names similar to
# property (fget) but it does not provide fset and fdel.
cache_readonly = property

class AxisProperty:
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
Expand Down
17 changes: 17 additions & 0 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ from pandas.core.dtypes.dtypes import (

from pandas.io.formats.format import EngFormatter

Incomplete: TypeAlias = Any

ArrayLike: TypeAlias = ExtensionArray | np.ndarray
AnyArrayLike: TypeAlias = Index | Series | np.ndarray
PythonScalar: TypeAlias = str | bool | complex
Expand Down Expand Up @@ -80,6 +82,10 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
us: DatetimeDictArg
ns: DatetimeDictArg

CorrelationMethod: TypeAlias = (
Literal["pearson", "kendall", "spearman"]
| Callable[[np.ndarray, np.ndarray], float]
)
# dtypes
NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | object]
Dtype: TypeAlias = ExtensionDtype | NpDtype
Expand Down Expand Up @@ -444,6 +450,7 @@ class SequenceNotStr(Protocol[_T_co]):
IndexLabel: TypeAlias = Hashable | Sequence[Hashable]
Label: TypeAlias = Hashable | None
Level: TypeAlias = Hashable | int
Shape: TypeAlias = tuple[int, ...]
Suffixes: TypeAlias = tuple[str | None, str | None]
Ordered: TypeAlias = bool | None
JSONSerializable: TypeAlias = PythonScalar | list | dict
Expand All @@ -469,8 +476,11 @@ AggFuncTypeSeriesToFrame: TypeAlias = list[AggFuncTypeBase] | AggFuncTypeDictSer
AggFuncTypeFrame: TypeAlias = (
AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDictFrame
)
AggFuncTypeDict: TypeAlias = AggFuncTypeDictSeries | AggFuncTypeDictFrame
AggFuncType: TypeAlias = AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDict

num: TypeAlias = complex
AxisInt: TypeAlias = int
AxisIndex: TypeAlias = Literal["index", 0]
AxisColumn: TypeAlias = Literal["columns", 1]
Axis: TypeAlias = AxisIndex | AxisColumn
Expand Down Expand Up @@ -563,6 +573,9 @@ IndexT = TypeVar("IndexT", bound=Index)
IntervalT = TypeVar("IntervalT", bound=Interval)
IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"]

ScalarIndexer: TypeAlias = int | np.integer
SequenceIndexer: TypeAlias = slice | list[int] | np.ndarray
PositionalIndexer: TypeAlias = ScalarIndexer | SequenceIndexer
TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer]

IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"]
Expand Down Expand Up @@ -758,5 +771,9 @@ RandomState: TypeAlias = (
| np.random.BitGenerator
| np.random.RandomState
)
Frequency: TypeAlias = str | BaseOffset
TimeGrouperOrigin: TypeAlias = (
Timestamp | Literal["epoch", "start", "start_day", "end", "end_day"]
)

__all__ = ["npt", "type_t"]
242 changes: 242 additions & 0 deletions pandas-stubs/core/apply.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import abc
from abc import abstractmethod
from collections.abc import (
Callable,
Hashable,
Iterable,
Iterator,
Sequence,
)
from typing import (
Any,
Generic,
Literal,
TypeVar,
)

import numpy as np
from pandas import (
DataFrame,
Index,
Series,
)
from pandas.core.generic import NDFrame
from pandas.core.groupby import GroupBy
from pandas.core.resample import Resampler
from pandas.core.window.rolling import BaseWindow

from pandas._libs.lib import NoDefault
from pandas._typing import (
AggFuncType,
AggFuncTypeDict,
Axis,
AxisInt,
Incomplete,
NDFrameT,
npt,
)
from pandas.util._decorators import cache_readonly

_AggObjT = TypeVar("_AggObjT", bound=NDFrame | GroupBy | BaseWindow | Resampler)
_AggGroupByObjT = TypeVar("_AggGroupByObjT", bound=GroupBy | BaseWindow | Resampler)
_AggResamplerWindowObjT = TypeVar(
"_AggResamplerWindowObjT", bound=BaseWindow | Resampler
)

ResType = dict[int, Any] # noqa: PYI026

def frame_apply(
obj: DataFrame,
func: AggFuncType,
axis: Axis = 0,
raw: bool = False,
result_type: str | None = None,
by_row: Literal[False, "compat"] = "compat",
args=None,
kwargs=None,
) -> FrameApply: ...

class Apply(Generic[_AggObjT], metaclass=abc.ABCMeta):
axis: AxisInt
obj: _AggObjT
raw: bool
by_row: Literal[False, "compat", "_compat"]
args: Incomplete
kwargs: Incomplete
result_type: Literal["reduce", "broadcast", "expand"] | None
func: AggFuncType
def __init__(
self,
obj: _AggObjT,
func: AggFuncType,
raw: bool,
result_type: Literal["reduce", "broadcast", "expand"] | None,
*,
by_row: Literal[False, "compat", "_compat"] = "compat",
args,
kwargs,
) -> None: ...
@abstractmethod
def apply(self): ...
@abstractmethod
def agg_or_apply_list_like(self, op_name: Literal["agg", "apply"]): ...
@abstractmethod
def agg_or_apply_dict_like(self, op_name: Literal["agg", "apply"]): ...
def agg(self): ...
def transform(self): ...
def transform_dict_like(self, func: AggFuncTypeDict) -> DataFrame: ...
def transform_str_or_callable(self, func: str | Callable[..., Incomplete]): ...
def agg_list_like(self): ...
def compute_list_like(
self,
op_name: Literal["agg", "apply"],
selected_obj: Series | DataFrame,
kwargs: dict[str, Any],
) -> tuple[list[Hashable], list[Any]]: ...
def wrap_results_list_like(
self, keys: list[Hashable], results: list[Series | DataFrame]
): ...
def agg_dict_like(self): ...
def compute_dict_like(
self,
op_name: Literal["agg", "apply"],
selected_obj: Series | DataFrame,
selection: Hashable | Sequence[Hashable],
kwargs: dict[str, Any],
) -> tuple[list[Hashable], list[Any]]: ...
def wrap_results_dict_like(
self,
selected_obj: Series | DataFrame,
result_index: list[Hashable],
result_data: list,
) -> Series | DataFrame: ...
def apply_str(self): ...
def apply_list_or_dict_like(self): ...
def normalize_dictlike_arg(
self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
) -> AggFuncTypeDict: ...

class NDFrameApply(Apply[NDFrameT], metaclass=abc.ABCMeta):
@property
def index(self) -> Index: ...
@property
def agg_axis(self) -> Index: ...
def agg_or_apply_list_like(self, op_name: Literal["agg", "apply"]): ...
def agg_or_apply_dict_like(self, op_name: Literal["agg", "apply"]): ...

class FrameApply(NDFrameApply[DataFrame]):
def __init__(
self,
obj: DataFrame,
func: AggFuncType,
raw: bool,
result_type: Literal["reduce", "broadcast", "expand"] | None,
*,
by_row: Literal[False, "compat"] = False,
args,
kwargs,
) -> None: ...
@property
@abstractmethod
def result_index(self) -> Index: ...
@property
@abstractmethod
def result_columns(self) -> Index: ...
@property
@abstractmethod
def series_generator(self) -> Iterator[Series]: ...
@abstractmethod
def wrap_results_for_axis(self, results: ResType, res_index: Index): ...
@property
def res_columns(self) -> Index: ...
@property
def columns(self) -> Index: ...
@cache_readonly
def values(self): ...
def apply(self): ...
def agg(self): ...
def apply_empty_result(self): ...
def apply_raw(self): ...
def apply_broadcast(self, target: DataFrame) -> DataFrame: ...
def apply_standard(self): ...
def apply_series_generator(self) -> tuple[ResType, Index]: ...
def wrap_results(self, results: ResType, res_index: Index): ...
def apply_str(self): ...

class FrameRowApply(FrameApply):
@property
def series_generator(self) -> Iterator[Series]: ...
@property
def result_index(self) -> Index: ...
@property
def result_columns(self) -> Index: ...
def wrap_results_for_axis(self, results: ResType, res_index: Index): ...

class FrameColumnApply(FrameApply):
def apply_broadcast(self, target: DataFrame) -> DataFrame: ...
@property
def series_generator(self) -> Iterator[Series]: ...
@property
def result_index(self) -> Index: ...
@property
def result_columns(self) -> Index: ...
def wrap_results_for_axis(self, results: ResType, res_index: Index): ...
def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: ...

class SeriesApply(NDFrameApply[Series]):
by_row: Literal[False, "compat", "_compat"]
convert_dtype: bool
def __init__(
self,
obj: Series,
func: AggFuncType,
*,
convert_dtype: bool | NoDefault = ...,
by_row: Literal[False, "compat", "_compat"] = "compat",
args,
kwargs,
) -> None: ...
def apply(self): ...
def agg(self): ...
def apply_empty_result(self) -> Series: ...
def apply_compat(self): ...
def apply_standard(self): ...

class GroupByApply(Apply[_AggGroupByObjT]):
def __init__(
self, obj: _AggGroupByObjT, func: AggFuncType, *, args, kwargs
) -> None: ...
def apply(self): ...
def transform(self): ...
def agg_or_apply_list_like(self, op_name: Literal["agg", "apply"]): ...
def agg_or_apply_dict_like(self, op_name: Literal["agg", "apply"]): ...

class ResamplerWindowApply(GroupByApply[_AggResamplerWindowObjT]):
def __init__(
self, obj: _AggResamplerWindowObjT, func: AggFuncType, *, args, kwargs
) -> None: ...
def apply(self): ...
def transform(self): ...

def reconstruct_func(
func: AggFuncType | None, **kwargs
) -> tuple[bool, AggFuncType, list[str] | None, npt.NDArray[np.intp] | None]: ...
def is_multi_agg_with_relabel(**kwargs) -> bool: ...
def normalize_keyword_aggregation(
kwargs: dict,
) -> tuple[dict[str, list], list[str], npt.NDArray[np.intp]]: ...
def relabel_result(
result: DataFrame | Series,
func: dict[str, list[Callable | str]],
columns: Iterable[Hashable],
order: Iterable[int],
) -> dict[Hashable, Series]: ...
def reconstruct_and_relabel_result(result, func, **kwargs): ...
def maybe_mangle_lambdas(agg_spec: Any) -> Any: ...
def validate_func_kwargs(
kwargs: dict,
) -> tuple[list[str], list[str | Callable[..., Any]]]: ...
def include_axis(
op_name: Literal["agg", "apply"], colg: Series | DataFrame
) -> bool: ...
def warn_alias_replacement(obj, func: Callable, alias: str) -> None: ...
17 changes: 15 additions & 2 deletions pandas-stubs/core/base.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from collections.abc import Iterator
from collections.abc import (
Hashable,
Iterator,
)
from typing import (
Any,
Generic,
Literal,
final,
)

import numpy as np
Expand All @@ -19,13 +24,21 @@ from pandas._typing import (
Scalar,
npt,
)
from pandas.util._decorators import cache_readonly

class PandasObject: ...

class NoNewAttributesMixin:
def __setattr__(self, key, value) -> None: ...
def __setattr__(self, key: str, value: Any) -> None: ...

class SelectionMixin(Generic[NDFrameT]):
obj: NDFrameT
exclusions: frozenset[Hashable]
@final
@cache_readonly
def ndim(self) -> int: ...
def __getitem__(self, key): ...
def aggregate(self, func, *args, **kwargs): ...

class IndexOpsMixin(OpsMixin, Generic[S1]):
__array_priority__: int = ...
Expand Down
Loading