diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 231d40e17c0c0..04c3ff3a42971 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -1477,7 +1477,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.DatetimeIndex.std\ pandas.ExcelFile\ pandas.ExcelFile.parse\ - pandas.Grouper\ pandas.HDFStore.append\ pandas.HDFStore.put\ pandas.Index.get_indexer_for\ @@ -1538,21 +1537,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.api.types.is_float\ pandas.api.types.is_hashable\ pandas.api.types.is_integer\ - pandas.core.groupby.DataFrameGroupBy.cummax\ - pandas.core.groupby.DataFrameGroupBy.cummin\ - pandas.core.groupby.DataFrameGroupBy.cumprod\ - pandas.core.groupby.DataFrameGroupBy.cumsum\ - pandas.core.groupby.DataFrameGroupBy.filter\ - pandas.core.groupby.DataFrameGroupBy.pct_change\ - pandas.core.groupby.DataFrameGroupBy.rolling\ - pandas.core.groupby.SeriesGroupBy.cummax\ - pandas.core.groupby.SeriesGroupBy.cummin\ - pandas.core.groupby.SeriesGroupBy.cumprod\ - pandas.core.groupby.SeriesGroupBy.cumsum\ pandas.core.groupby.SeriesGroupBy.filter\ - pandas.core.groupby.SeriesGroupBy.nunique\ - pandas.core.groupby.SeriesGroupBy.pct_change\ - pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.resample.Resampler.max\ pandas.core.resample.Resampler.min\ pandas.core.resample.Resampler.quantile\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index dfe145783c4a3..c90ae4d590b45 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -723,15 +723,22 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: """ Return number of unique elements in the group. + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the counts. + Returns ------- Series Number of unique values within each group. - Examples + See Also -------- - For SeriesGroupby: + core.resample.Resampler.nunique : Method nunique for Resampler. + Examples + -------- >>> lst = ["a", "a", "b", "b"] >>> ser = pd.Series([1, 2, 3, 3], index=lst) >>> ser @@ -744,25 +751,6 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: a 2 b 1 dtype: int64 - - For Resampler: - - >>> ser = pd.Series( - ... [1, 2, 3, 3], - ... index=pd.DatetimeIndex( - ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] - ... ), - ... ) - >>> ser - 2023-01-01 1 - 2023-01-15 2 - 2023-02-01 3 - 2023-02-15 3 - dtype: int64 - >>> ser.resample("MS").nunique() - 2023-01-01 2 - 2023-02-01 1 - Freq: MS, dtype: int64 """ ids, ngroups = self._grouper.group_info val = self.obj._values @@ -1942,6 +1930,10 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame: dropna : bool Drop groups that do not pass the filter. True by default; if False, groups that evaluate False are filled with NaNs. + *args + Additional positional arguments to pass to `func`. + **kwargs + Additional keyword arguments to pass to `func`. Returns ------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 103f7b55c0550..7c9fe0df9d022 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4672,6 +4672,14 @@ def cumprod(self, *args, **kwargs) -> NDFrameT: """ Cumulative product for each group. + Parameters + ---------- + *args : tuple + Positional arguments to be passed to `func`. + **kwargs : dict + Additional/specific keyword arguments to be passed to the function, + such as `numeric_only` and `skipna`. + Returns ------- Series or DataFrame @@ -4722,6 +4730,14 @@ def cumsum(self, *args, **kwargs) -> NDFrameT: """ Cumulative sum for each group. + Parameters + ---------- + *args : tuple + Positional arguments to be passed to `func`. + **kwargs : dict + Additional/specific keyword arguments to be passed to the function, + such as `numeric_only` and `skipna`. + Returns ------- Series or DataFrame @@ -4776,6 +4792,14 @@ def cummin( """ Cumulative min for each group. + Parameters + ---------- + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + **kwargs : dict, optional + Additional keyword arguments to be passed to the function, such as `skipna`, + to control whether NA/null values are ignored. + Returns ------- Series or DataFrame @@ -4838,6 +4862,14 @@ def cummax( """ Cumulative max for each group. + Parameters + ---------- + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + **kwargs : dict, optional + Additional keyword arguments to be passed to the function, such as `skipna`, + to control whether NA/null values are ignored. + Returns ------- Series or DataFrame @@ -5134,6 +5166,32 @@ def pct_change( """ Calculate pct_change of each value to previous entry in group. + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating percentage change. Comparing with + a period of 1 means adjacent elements are compared, whereas a period + of 2 compares every other element. + + fill_method : FillnaOptions or None, default None + Specifies how to handle missing values after the initial shift + operation necessary for percentage change calculation. Users are + encouraged to handle missing values manually in future versions. + Valid options are: + - A FillnaOptions value ('ffill', 'bfill') for forward or backward filling. + - None to avoid filling. + Note: Usage is discouraged due to impending deprecation. + + limit : int or None, default None + The maximum number of consecutive NA values to fill, based on the chosen + `fill_method`. Address NaN values prior to using `pct_change` as this + parameter is nearing deprecation. + + freq : str, pandas offset object, or None, default None + The frequency increment for time series data (e.g., 'M' for month-end). + If None, the frequency is inferred from the index. Relevant for time + series data only. + Returns ------- Series or DataFrame diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b5e4881ffc29c..9179bec86f660 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -68,6 +68,10 @@ class Grouper: Parameters ---------- + *args + Currently unused, reserved for future use. + **kwargs + Dictionary of the keyword arguments to pass to Grouper. key : str, defaults to None Groupby key, which selects the grouping column of the target. level : name/number, defaults to None diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e60dcdb10e653..4147437114b2f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -59,7 +59,6 @@ NDFrame, _shared_docs, ) -from pandas.core.groupby.generic import SeriesGroupBy from pandas.core.groupby.groupby import ( BaseGroupBy, GroupBy, @@ -1358,8 +1357,38 @@ def ohlc(self): return self._downsample("ohlc") @final - @doc(SeriesGroupBy.nunique) def nunique(self): + """ + Return number of unique elements in the group. + + Returns + ------- + Series + Number of unique values within each group. + + See Also + -------- + core.groupby.SeriesGroupBy.nunique : Method nunique for SeriesGroupBy. + + Examples + -------- + >>> ser = pd.Series( + ... [1, 2, 3, 3], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 3 + dtype: int64 + >>> ser.resample("MS").nunique() + 2023-01-01 2 + 2023-02-01 1 + Freq: MS, dtype: int64 + """ return self._downsample("nunique") @final