From 113522fb16f70089b11a6c071b40cc93430e9b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 28 Jun 2023 15:24:34 +0200 Subject: [PATCH 1/2] Examples Resampler.__iter__, groups, indices, get_group, ffill --- ci/code_checks.sh | 7 ---- pandas/core/groupby/groupby.py | 63 ++++++++++++++++++++++++++++++++++ pandas/core/resample.py | 47 +++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 7 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f4a0488581606..8155d398d4d88 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -106,8 +106,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.test \ pandas.NaT \ pandas.io.formats.style.Styler.to_html \ - pandas.HDFStore.groups \ - pandas.HDFStore.walk \ pandas.read_feather \ pandas.DataFrame.to_feather \ pandas.read_parquet \ @@ -120,11 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.io.stata.StataReader.value_labels \ pandas.io.stata.StataReader.variable_labels \ pandas.io.stata.StataWriter.write_file \ - pandas.core.resample.Resampler.__iter__ \ - pandas.core.resample.Resampler.groups \ - pandas.core.resample.Resampler.indices \ - pandas.core.resample.Resampler.get_group \ - pandas.core.resample.Resampler.ffill \ pandas.core.resample.Resampler.asfreq \ pandas.core.resample.Resampler.count \ pandas.core.resample.Resampler.nunique \ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1a17fef071a2f..30f5f21f03658 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -753,6 +753,19 @@ def groups(self) -> dict[Hashable, np.ndarray]: 2 7 8 9 >>> df.groupby(by=["a"]).groups {1: [0, 1], 7: [2]} + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').groups + {Timestamp('2023-01-31 00:00:00'): 2, Timestamp('2023-02-28 00:00:00'): 4} """ return self.grouper.groups @@ -794,6 +807,20 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: eagle 7 8 9 >>> df.groupby(by=["a"]).indices {1: array([0, 1]), 7: array([2])} + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').indices + defaultdict(, {Timestamp('2023-01-31 00:00:00'): [0, 1], + Timestamp('2023-02-28 00:00:00'): [2, 3]}) """ return self.grouper.indices @@ -965,6 +992,21 @@ def get_group(self, name, obj=None) -> DataFrame | Series: a b c owl 1 2 3 toucan 1 5 6 + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample('M').get_group('2023-01-31') + 2023-01-01 1 + 2023-01-15 2 + dtype: int64 """ inds = self._get_index(name) if not len(inds): @@ -1032,6 +1074,27 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: (7,) a b c 2 7 8 9 + + For Resampler: + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> for x, y in ser.resample('M'): + ... print(f'{x}\\n{y}\\n') + 2023-01-31 00:00:00 + 2023-01-01 1 + 2023-01-15 2 + dtype: int64 + 2023-02-28 00:00:00 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 """ keys = self.keys level = self.level diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9566a2f113b36..0cd30fdba16c1 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -503,6 +503,53 @@ def ffill(self, limit: int | None = None): -------- Series.fillna: Fill NA/NaN values using the specified method. DataFrame.fillna: Fill NA/NaN values using the specified method. + + Examples + -------- + Here we only create a ``Series``. + + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + + Example for ``ffill`` with downsampling (we have fewer dates after resampling): + + >>> ser.resample('M').ffill() + 2023-01-31 2 + 2023-02-28 4 + Freq: M, dtype: int64 + + Example for ``ffill`` with upsampling (we have more dates after resampling): + + >>> ser.resample('W').ffill() + 2023-01-01 1 + 2023-01-08 1 + 2023-01-15 2 + 2023-01-22 2 + 2023-01-29 2 + 2023-02-05 3 + 2023-02-12 3 + 2023-02-19 4 + Freq: W-SUN, dtype: int64 + + With upsampling and limiting (we have more dates after resampling but + we cannot fill them all because of the given limit): + + >>> ser.resample('W').ffill(limit=1) + 2023-01-01 1.0 + 2023-01-08 1.0 + 2023-01-15 2.0 + 2023-01-22 2.0 + 2023-01-29 NaN + 2023-02-05 3.0 + 2023-02-12 NaN + 2023-02-19 4.0 + Freq: W-SUN, dtype: float64 """ return self._upsample("ffill", limit=limit) From 5a778deec34d928c83c9723c25416805f6b398eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 28 Jun 2023 18:00:29 +0200 Subject: [PATCH 2/2] Improved explanation for ffill --- pandas/core/resample.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0cd30fdba16c1..9391696d1925a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -524,7 +524,8 @@ def ffill(self, limit: int | None = None): 2023-02-28 4 Freq: M, dtype: int64 - Example for ``ffill`` with upsampling (we have more dates after resampling): + Example for ``ffill`` with upsampling (fill the new dates with + the previous value): >>> ser.resample('W').ffill() 2023-01-01 1 @@ -537,8 +538,8 @@ def ffill(self, limit: int | None = None): 2023-02-19 4 Freq: W-SUN, dtype: int64 - With upsampling and limiting (we have more dates after resampling but - we cannot fill them all because of the given limit): + With upsampling and limiting (only fill the first new date with the + previous value): >>> ser.resample('W').ffill(limit=1) 2023-01-01 1.0