Skip to content

Commit 740c311

Browse files
rhshadrachcbpygit
authored andcommitted
ENH: Add limit_area to ffill/bfill (pandas-dev#56531)
* ENH: Add limit_area to ffill/bfill * Fix bfill default * Update groupby API test * Update groupby API test
1 parent f64085e commit 740c311

File tree

5 files changed

+209
-3
lines changed

5 files changed

+209
-3
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Other enhancements
229229
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
230230
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
231231
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
232+
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
232233
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
233234
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
234235
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)

pandas/core/generic.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -7060,6 +7060,7 @@ def _pad_or_backfill(
70607060
axis: None | Axis = None,
70617061
inplace: bool_t = False,
70627062
limit: None | int = None,
7063+
limit_area: Literal["inside", "outside"] | None = None,
70637064
downcast: dict | None = None,
70647065
):
70657066
if axis is None:
@@ -7073,14 +7074,17 @@ def _pad_or_backfill(
70737074
# in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
70747075
if inplace:
70757076
raise NotImplementedError()
7076-
result = self.T._pad_or_backfill(method=method, limit=limit).T
7077+
result = self.T._pad_or_backfill(
7078+
method=method, limit=limit, limit_area=limit_area
7079+
).T
70777080

70787081
return result
70797082

70807083
new_mgr = self._mgr.pad_or_backfill(
70817084
method=method,
70827085
axis=self._get_block_manager_axis(axis),
70837086
limit=limit,
7087+
limit_area=limit_area,
70847088
inplace=inplace,
70857089
downcast=downcast,
70867090
)
@@ -7440,6 +7444,7 @@ def ffill(
74407444
axis: None | Axis = ...,
74417445
inplace: Literal[False] = ...,
74427446
limit: None | int = ...,
7447+
limit_area: Literal["inside", "outside"] | None = ...,
74437448
downcast: dict | None | lib.NoDefault = ...,
74447449
) -> Self:
74457450
...
@@ -7451,6 +7456,7 @@ def ffill(
74517456
axis: None | Axis = ...,
74527457
inplace: Literal[True],
74537458
limit: None | int = ...,
7459+
limit_area: Literal["inside", "outside"] | None = ...,
74547460
downcast: dict | None | lib.NoDefault = ...,
74557461
) -> None:
74567462
...
@@ -7462,6 +7468,7 @@ def ffill(
74627468
axis: None | Axis = ...,
74637469
inplace: bool_t = ...,
74647470
limit: None | int = ...,
7471+
limit_area: Literal["inside", "outside"] | None = ...,
74657472
downcast: dict | None | lib.NoDefault = ...,
74667473
) -> Self | None:
74677474
...
@@ -7477,6 +7484,7 @@ def ffill(
74777484
axis: None | Axis = None,
74787485
inplace: bool_t = False,
74797486
limit: None | int = None,
7487+
limit_area: Literal["inside", "outside"] | None = None,
74807488
downcast: dict | None | lib.NoDefault = lib.no_default,
74817489
) -> Self | None:
74827490
"""
@@ -7498,6 +7506,17 @@ def ffill(
74987506
be partially filled. If method is not specified, this is the
74997507
maximum number of entries along the entire axis where NaNs will be
75007508
filled. Must be greater than 0 if not None.
7509+
limit_area : {{`None`, 'inside', 'outside'}}, default None
7510+
If limit is specified, consecutive NaNs will be filled with this
7511+
restriction.
7512+
7513+
* ``None``: No fill restriction.
7514+
* 'inside': Only fill NaNs surrounded by valid values
7515+
(interpolate).
7516+
* 'outside': Only fill NaNs outside valid values (extrapolate).
7517+
7518+
.. versionadded:: 2.2.0
7519+
75017520
downcast : dict, default is None
75027521
A dict of item->dtype of what to downcast if possible,
75037522
or the string 'infer' which will try to downcast to an appropriate
@@ -7569,6 +7588,7 @@ def ffill(
75697588
axis=axis,
75707589
inplace=inplace,
75717590
limit=limit,
7591+
limit_area=limit_area,
75727592
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
75737593
# has incompatible type "Union[Dict[Any, Any], None,
75747594
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
@@ -7616,6 +7636,7 @@ def bfill(
76167636
axis: None | Axis = ...,
76177637
inplace: Literal[False] = ...,
76187638
limit: None | int = ...,
7639+
limit_area: Literal["inside", "outside"] | None = ...,
76197640
downcast: dict | None | lib.NoDefault = ...,
76207641
) -> Self:
76217642
...
@@ -7638,6 +7659,7 @@ def bfill(
76387659
axis: None | Axis = ...,
76397660
inplace: bool_t = ...,
76407661
limit: None | int = ...,
7662+
limit_area: Literal["inside", "outside"] | None = ...,
76417663
downcast: dict | None | lib.NoDefault = ...,
76427664
) -> Self | None:
76437665
...
@@ -7653,6 +7675,7 @@ def bfill(
76537675
axis: None | Axis = None,
76547676
inplace: bool_t = False,
76557677
limit: None | int = None,
7678+
limit_area: Literal["inside", "outside"] | None = None,
76567679
downcast: dict | None | lib.NoDefault = lib.no_default,
76577680
) -> Self | None:
76587681
"""
@@ -7674,6 +7697,17 @@ def bfill(
76747697
be partially filled. If method is not specified, this is the
76757698
maximum number of entries along the entire axis where NaNs will be
76767699
filled. Must be greater than 0 if not None.
7700+
limit_area : {{`None`, 'inside', 'outside'}}, default None
7701+
If limit is specified, consecutive NaNs will be filled with this
7702+
restriction.
7703+
7704+
* ``None``: No fill restriction.
7705+
* 'inside': Only fill NaNs surrounded by valid values
7706+
(interpolate).
7707+
* 'outside': Only fill NaNs outside valid values (extrapolate).
7708+
7709+
.. versionadded:: 2.2.0
7710+
76777711
downcast : dict, default is None
76787712
A dict of item->dtype of what to downcast if possible,
76797713
or the string 'infer' which will try to downcast to an appropriate
@@ -7756,6 +7790,7 @@ def bfill(
77567790
axis=axis,
77577791
inplace=inplace,
77587792
limit=limit,
7793+
limit_area=limit_area,
77597794
# error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
77607795
# has incompatible type "Union[Dict[Any, Any], None,
77617796
# Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"

pandas/tests/frame/methods/test_fillna.py

+97
Original file line numberDiff line numberDiff line change
@@ -857,3 +857,100 @@ def test_pad_backfill_deprecated(func):
857857
df = DataFrame({"a": [1, 2, 3]})
858858
with tm.assert_produces_warning(FutureWarning):
859859
getattr(df, func)()
860+
861+
862+
@pytest.mark.parametrize(
863+
"data, expected_data, method, kwargs",
864+
(
865+
pytest.param(
866+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
867+
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
868+
"ffill",
869+
{"limit_area": "inside"},
870+
marks=pytest.mark.xfail(
871+
reason="GH#41813 - limit_area applied to the wrong axis"
872+
),
873+
),
874+
pytest.param(
875+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
876+
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
877+
"ffill",
878+
{"limit_area": "inside", "limit": 1},
879+
marks=pytest.mark.xfail(
880+
reason="GH#41813 - limit_area applied to the wrong axis"
881+
),
882+
),
883+
pytest.param(
884+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
885+
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
886+
"ffill",
887+
{"limit_area": "outside"},
888+
marks=pytest.mark.xfail(
889+
reason="GH#41813 - limit_area applied to the wrong axis"
890+
),
891+
),
892+
pytest.param(
893+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
894+
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
895+
"ffill",
896+
{"limit_area": "outside", "limit": 1},
897+
marks=pytest.mark.xfail(
898+
reason="GH#41813 - limit_area applied to the wrong axis"
899+
),
900+
),
901+
(
902+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
903+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
904+
"ffill",
905+
{"limit_area": "outside", "limit": 1},
906+
),
907+
(
908+
range(5),
909+
range(5),
910+
"ffill",
911+
{"limit_area": "outside", "limit": 1},
912+
),
913+
pytest.param(
914+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
915+
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
916+
"bfill",
917+
{"limit_area": "inside"},
918+
marks=pytest.mark.xfail(
919+
reason="GH#41813 - limit_area applied to the wrong axis"
920+
),
921+
),
922+
pytest.param(
923+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
924+
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
925+
"bfill",
926+
{"limit_area": "inside", "limit": 1},
927+
marks=pytest.mark.xfail(
928+
reason="GH#41813 - limit_area applied to the wrong axis"
929+
),
930+
),
931+
pytest.param(
932+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
933+
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
934+
"bfill",
935+
{"limit_area": "outside"},
936+
marks=pytest.mark.xfail(
937+
reason="GH#41813 - limit_area applied to the wrong axis"
938+
),
939+
),
940+
pytest.param(
941+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
942+
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
943+
"bfill",
944+
{"limit_area": "outside", "limit": 1},
945+
marks=pytest.mark.xfail(
946+
reason="GH#41813 - limit_area applied to the wrong axis"
947+
),
948+
),
949+
),
950+
)
951+
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
952+
# GH#56492
953+
df = DataFrame(data)
954+
expected = DataFrame(expected_data)
955+
result = getattr(df, method)(**kwargs)
956+
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def test_frame_consistency(groupby_func):
183183
elif groupby_func in ("median", "prod", "sem"):
184184
exclude_expected = {"axis", "kwargs", "skipna"}
185185
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
186-
exclude_expected = {"downcast", "inplace", "axis"}
186+
exclude_expected = {"downcast", "inplace", "axis", "limit_area"}
187187
elif groupby_func in ("cummax", "cummin"):
188188
exclude_expected = {"skipna", "args"}
189189
exclude_result = {"numeric_only"}
@@ -240,7 +240,7 @@ def test_series_consistency(request, groupby_func):
240240
elif groupby_func in ("median", "prod", "sem"):
241241
exclude_expected = {"axis", "kwargs", "skipna"}
242242
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
243-
exclude_expected = {"downcast", "inplace", "axis"}
243+
exclude_expected = {"downcast", "inplace", "axis", "limit_area"}
244244
elif groupby_func in ("cummax", "cummin"):
245245
exclude_expected = {"skipna", "args"}
246246
exclude_result = {"numeric_only"}

pandas/tests/series/methods/test_fillna.py

+73
Original file line numberDiff line numberDiff line change
@@ -1080,3 +1080,76 @@ def test_pad_backfill_deprecated(self, func):
10801080
ser = Series([1, 2, 3])
10811081
with tm.assert_produces_warning(FutureWarning):
10821082
getattr(ser, func)()
1083+
1084+
1085+
@pytest.mark.parametrize(
1086+
"data, expected_data, method, kwargs",
1087+
(
1088+
(
1089+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1090+
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
1091+
"ffill",
1092+
{"limit_area": "inside"},
1093+
),
1094+
(
1095+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1096+
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
1097+
"ffill",
1098+
{"limit_area": "inside", "limit": 1},
1099+
),
1100+
(
1101+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1102+
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
1103+
"ffill",
1104+
{"limit_area": "outside"},
1105+
),
1106+
(
1107+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1108+
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
1109+
"ffill",
1110+
{"limit_area": "outside", "limit": 1},
1111+
),
1112+
(
1113+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
1114+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
1115+
"ffill",
1116+
{"limit_area": "outside", "limit": 1},
1117+
),
1118+
(
1119+
range(5),
1120+
range(5),
1121+
"ffill",
1122+
{"limit_area": "outside", "limit": 1},
1123+
),
1124+
(
1125+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1126+
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
1127+
"bfill",
1128+
{"limit_area": "inside"},
1129+
),
1130+
(
1131+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1132+
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
1133+
"bfill",
1134+
{"limit_area": "inside", "limit": 1},
1135+
),
1136+
(
1137+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1138+
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
1139+
"bfill",
1140+
{"limit_area": "outside"},
1141+
),
1142+
(
1143+
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
1144+
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
1145+
"bfill",
1146+
{"limit_area": "outside", "limit": 1},
1147+
),
1148+
),
1149+
)
1150+
def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
1151+
# GH#56492
1152+
s = Series(data)
1153+
expected = Series(expected_data)
1154+
result = getattr(s, method)(**kwargs)
1155+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)