diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f4a0488581606..8155d398d4d88 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -106,8 +106,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.test \ pandas.NaT \ pandas.io.formats.style.Styler.to_html \ - pandas.HDFStore.groups \ - pandas.HDFStore.walk \ pandas.read_feather \ pandas.DataFrame.to_feather \ pandas.read_parquet \ @@ -120,11 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.io.stata.StataReader.value_labels \ pandas.io.stata.StataReader.variable_labels \ pandas.io.stata.StataWriter.write_file \ - pandas.core.resample.Resampler.__iter__ \ - pandas.core.resample.Resampler.groups \ - pandas.core.resample.Resampler.indices \ - pandas.core.resample.Resampler.get_group \ - pandas.core.resample.Resampler.ffill \ pandas.core.resample.Resampler.asfreq \ pandas.core.resample.Resampler.count \ pandas.core.resample.Resampler.nunique \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1a17fef071a2f..30f5f21f03658 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -753,6 +753,19 @@ def groups(self) -> dict[Hashable, np.ndarray]: 2 7 8 9 >>> df.groupby(by=["a"]).groups {1: [0, 1], 7: [2]} + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').groups + {Timestamp('2023-01-31 00:00:00'): 2, Timestamp('2023-02-28 00:00:00'): 4} """ return self.grouper.groups @@ -794,6 +807,20 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: eagle 7 8 9 >>> df.groupby(by=["a"]).indices {1: array([0, 1]), 7: array([2])} + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').indices + defaultdict(, {Timestamp('2023-01-31 00:00:00'): [0, 1], + Timestamp('2023-02-28 00:00:00'): [2, 3]}) """ return self.grouper.indices @@ -965,6 +992,21 @@ def get_group(self, name, obj=None) -> DataFrame | Series: a b c owl 1 2 3 toucan 1 5 6 + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').get_group('2023-01-31') + 2023-01-01 1 + 2023-01-15 2 + dtype: int64 """ inds = self._get_index(name) if not len(inds): @@ -1032,6 +1074,27 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: (7,) a b c 2 7 8 9 + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> for x, y in ser.resample('M'): + ... print(f'{x}\\n{y}\\n') + 2023-01-31 00:00:00 + 2023-01-01 1 + 2023-01-15 2 + dtype: int64 + 2023-02-28 00:00:00 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 """ keys = self.keys level = self.level diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9566a2f113b36..9391696d1925a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -503,6 +503,54 @@ def ffill(self, limit: int | None = None): -------- Series.fillna: Fill NA/NaN values using the specified method. DataFrame.fillna: Fill NA/NaN values using the specified method. + + Examples + -------- + Here we only create a ``Series``. + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + + Example for ``ffill`` with downsampling (we have fewer dates after resampling): + + >>> ser.resample('M').ffill() + 2023-01-31 2 + 2023-02-28 4 + Freq: M, dtype: int64 + + Example for ``ffill`` with upsampling (fill the new dates with + the previous value): + + >>> ser.resample('W').ffill() + 2023-01-01 1 + 2023-01-08 1 + 2023-01-15 2 + 2023-01-22 2 + 2023-01-29 2 + 2023-02-05 3 + 2023-02-12 3 + 2023-02-19 4 + Freq: W-SUN, dtype: int64 + + With upsampling and limiting (only fill the first new date with the + previous value): + + >>> ser.resample('W').ffill(limit=1) + 2023-01-01 1.0 + 2023-01-08 1.0 + 2023-01-15 2.0 + 2023-01-22 2.0 + 2023-01-29 NaN + 2023-02-05 3.0 + 2023-02-12 NaN + 2023-02-19 4.0 + Freq: W-SUN, dtype: float64 """ return self._upsample("ffill", limit=limit)