Skip to content

Commit 218c9e8

Browse files
rhshadrachpmhatre1
authored andcommitted
ENH: Enable fillna(value=None) (pandas-dev#58085)
* ENH: Enable fillna(value=None) * fixups * fixup * Improve docs
1 parent 86f6fe7 commit 218c9e8

File tree

14 files changed

+155
-104
lines changed

14 files changed

+155
-104
lines changed

doc/source/user_guide/missing_data.rst

+21
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,27 @@ Replace NA with a scalar value
386386
df
387387
df.fillna(0)
388388
389+
When the data has object dtype, you can control what type of NA values are present.
390+
391+
.. ipython:: python
392+
393+
df = pd.DataFrame({"a": [pd.NA, np.nan, None]}, dtype=object)
394+
df
395+
df.fillna(None)
396+
df.fillna(np.nan)
397+
df.fillna(pd.NA)
398+
399+
However when the dtype is not object, these will all be replaced with the proper NA value for the dtype.
400+
401+
.. ipython:: python
402+
403+
data = {"np": [1.0, np.nan, np.nan, 2], "arrow": pd.array([1.0, pd.NA, pd.NA, 2], dtype="float64[pyarrow]")}
404+
df = pd.DataFrame(data)
405+
df
406+
df.fillna(None)
407+
df.fillna(np.nan)
408+
df.fillna(pd.NA)
409+
389410
Fill gaps forward or backward
390411

391412
.. ipython:: python

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other enhancements
3737
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
3838
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
3939
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
40+
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4041

4142
.. ---------------------------------------------------------------------------
4243
.. _whatsnew_300.notable_bug_fixes:

pandas/core/arrays/_mixins.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def _pad_or_backfill(
328328
return new_values
329329

330330
@doc(ExtensionArray.fillna)
331-
def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
331+
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
332332
mask = self.isna()
333333
# error: Argument 2 to "check_value_size" has incompatible type
334334
# "ExtensionArray"; expected "ndarray"
@@ -347,8 +347,7 @@ def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Sel
347347
new_values[mask] = value
348348
else:
349349
# We validate the fill_value even if there is nothing to fill
350-
if value is not None:
351-
self._validate_setitem_value(value)
350+
self._validate_setitem_value(value)
352351

353352
if not copy:
354353
new_values = self[:]

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,7 @@ def _pad_or_backfill(
10771077
@doc(ExtensionArray.fillna)
10781078
def fillna(
10791079
self,
1080-
value: object | ArrayLike | None = None,
1080+
value: object | ArrayLike,
10811081
limit: int | None = None,
10821082
copy: bool = True,
10831083
) -> Self:

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
892892
indexer = obj.argsort()[-1]
893893
return obj[indexer]
894894

895-
def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
895+
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
896896
"""
897897
Fill NA/NaN values using the specified method.
898898

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def _pad_or_backfill(
236236
return new_values
237237

238238
@doc(ExtensionArray.fillna)
239-
def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
239+
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
240240
mask = self._mask
241241

242242
value = missing.check_value_size(value, mask, len(self))

pandas/core/arrays/sparse/array.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ def isna(self) -> Self: # type: ignore[override]
706706

707707
def fillna(
708708
self,
709-
value=None,
709+
value,
710710
limit: int | None = None,
711711
copy: bool = True,
712712
) -> Self:
@@ -736,8 +736,6 @@ def fillna(
736736
When ``self.fill_value`` is not NA, the result dtype will be
737737
``self.dtype``. Again, this preserves the amount of memory used.
738738
"""
739-
if value is None:
740-
raise ValueError("Must specify 'value'.")
741739
new_values = np.where(isna(self.sp_values), value, self.sp_values)
742740

743741
if self._null_fill_value:

pandas/core/generic.py

+88-91
Original file line numberDiff line numberDiff line change
@@ -6752,7 +6752,7 @@ def _pad_or_backfill(
67526752
@overload
67536753
def fillna(
67546754
self,
6755-
value: Hashable | Mapping | Series | DataFrame = ...,
6755+
value: Hashable | Mapping | Series | DataFrame,
67566756
*,
67576757
axis: Axis | None = ...,
67586758
inplace: Literal[False] = ...,
@@ -6762,7 +6762,7 @@ def fillna(
67626762
@overload
67636763
def fillna(
67646764
self,
6765-
value: Hashable | Mapping | Series | DataFrame = ...,
6765+
value: Hashable | Mapping | Series | DataFrame,
67666766
*,
67676767
axis: Axis | None = ...,
67686768
inplace: Literal[True],
@@ -6772,7 +6772,7 @@ def fillna(
67726772
@overload
67736773
def fillna(
67746774
self,
6775-
value: Hashable | Mapping | Series | DataFrame = ...,
6775+
value: Hashable | Mapping | Series | DataFrame,
67766776
*,
67776777
axis: Axis | None = ...,
67786778
inplace: bool = ...,
@@ -6786,7 +6786,7 @@ def fillna(
67866786
)
67876787
def fillna(
67886788
self,
6789-
value: Hashable | Mapping | Series | DataFrame | None = None,
6789+
value: Hashable | Mapping | Series | DataFrame,
67906790
*,
67916791
axis: Axis | None = None,
67926792
inplace: bool = False,
@@ -6827,6 +6827,12 @@ def fillna(
68276827
reindex : Conform object to new index.
68286828
asfreq : Convert TimeSeries to specified frequency.
68296829
6830+
Notes
6831+
-----
6832+
For non-object dtype, ``value=None`` will use the NA value of the dtype.
6833+
See more details in the :ref:`Filling missing data<missing_data.fillna>`
6834+
section.
6835+
68306836
Examples
68316837
--------
68326838
>>> df = pd.DataFrame(
@@ -6909,101 +6915,92 @@ def fillna(
69096915
axis = 0
69106916
axis = self._get_axis_number(axis)
69116917

6912-
if value is None:
6913-
raise ValueError("Must specify a fill 'value'.")
6914-
else:
6915-
if self.ndim == 1:
6916-
if isinstance(value, (dict, ABCSeries)):
6917-
if not len(value):
6918-
# test_fillna_nonscalar
6919-
if inplace:
6920-
return None
6921-
return self.copy(deep=False)
6922-
from pandas import Series
6923-
6924-
value = Series(value)
6925-
value = value.reindex(self.index)
6926-
value = value._values
6927-
elif not is_list_like(value):
6928-
pass
6929-
else:
6930-
raise TypeError(
6931-
'"value" parameter must be a scalar, dict '
6932-
"or Series, but you passed a "
6933-
f'"{type(value).__name__}"'
6934-
)
6918+
if self.ndim == 1:
6919+
if isinstance(value, (dict, ABCSeries)):
6920+
if not len(value):
6921+
# test_fillna_nonscalar
6922+
if inplace:
6923+
return None
6924+
return self.copy(deep=False)
6925+
from pandas import Series
6926+
6927+
value = Series(value)
6928+
value = value.reindex(self.index)
6929+
value = value._values
6930+
elif not is_list_like(value):
6931+
pass
6932+
else:
6933+
raise TypeError(
6934+
'"value" parameter must be a scalar, dict '
6935+
"or Series, but you passed a "
6936+
f'"{type(value).__name__}"'
6937+
)
69356938

6936-
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
6939+
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
69376940

6938-
elif isinstance(value, (dict, ABCSeries)):
6939-
if axis == 1:
6940-
raise NotImplementedError(
6941-
"Currently only can fill "
6942-
"with dict/Series column "
6943-
"by column"
6944-
)
6945-
result = self if inplace else self.copy(deep=False)
6946-
for k, v in value.items():
6947-
if k not in result:
6948-
continue
6941+
elif isinstance(value, (dict, ABCSeries)):
6942+
if axis == 1:
6943+
raise NotImplementedError(
6944+
"Currently only can fill with dict/Series column by column"
6945+
)
6946+
result = self if inplace else self.copy(deep=False)
6947+
for k, v in value.items():
6948+
if k not in result:
6949+
continue
69496950

6950-
res_k = result[k].fillna(v, limit=limit)
6951+
res_k = result[k].fillna(v, limit=limit)
69516952

6952-
if not inplace:
6953-
result[k] = res_k
6953+
if not inplace:
6954+
result[k] = res_k
6955+
else:
6956+
# We can write into our existing column(s) iff dtype
6957+
# was preserved.
6958+
if isinstance(res_k, ABCSeries):
6959+
# i.e. 'k' only shows up once in self.columns
6960+
if res_k.dtype == result[k].dtype:
6961+
result.loc[:, k] = res_k
6962+
else:
6963+
# Different dtype -> no way to do inplace.
6964+
result[k] = res_k
69546965
else:
6955-
# We can write into our existing column(s) iff dtype
6956-
# was preserved.
6957-
if isinstance(res_k, ABCSeries):
6958-
# i.e. 'k' only shows up once in self.columns
6959-
if res_k.dtype == result[k].dtype:
6960-
result.loc[:, k] = res_k
6966+
# see test_fillna_dict_inplace_nonunique_columns
6967+
locs = result.columns.get_loc(k)
6968+
if isinstance(locs, slice):
6969+
locs = np.arange(self.shape[1])[locs]
6970+
elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
6971+
locs = locs.nonzero()[0]
6972+
elif not (
6973+
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
6974+
):
6975+
# Should never be reached, but let's cover our bases
6976+
raise NotImplementedError(
6977+
"Unexpected get_loc result, please report a bug at "
6978+
"https://github.com/pandas-dev/pandas"
6979+
)
6980+
6981+
for i, loc in enumerate(locs):
6982+
res_loc = res_k.iloc[:, i]
6983+
target = self.iloc[:, loc]
6984+
6985+
if res_loc.dtype == target.dtype:
6986+
result.iloc[:, loc] = res_loc
69616987
else:
6962-
# Different dtype -> no way to do inplace.
6963-
result[k] = res_k
6964-
else:
6965-
# see test_fillna_dict_inplace_nonunique_columns
6966-
locs = result.columns.get_loc(k)
6967-
if isinstance(locs, slice):
6968-
locs = np.arange(self.shape[1])[locs]
6969-
elif (
6970-
isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
6971-
):
6972-
locs = locs.nonzero()[0]
6973-
elif not (
6974-
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
6975-
):
6976-
# Should never be reached, but let's cover our bases
6977-
raise NotImplementedError(
6978-
"Unexpected get_loc result, please report a bug at "
6979-
"https://github.com/pandas-dev/pandas"
6980-
)
6981-
6982-
for i, loc in enumerate(locs):
6983-
res_loc = res_k.iloc[:, i]
6984-
target = self.iloc[:, loc]
6985-
6986-
if res_loc.dtype == target.dtype:
6987-
result.iloc[:, loc] = res_loc
6988-
else:
6989-
result.isetitem(loc, res_loc)
6990-
if inplace:
6991-
return self._update_inplace(result)
6992-
else:
6993-
return result
6988+
result.isetitem(loc, res_loc)
6989+
if inplace:
6990+
return self._update_inplace(result)
6991+
else:
6992+
return result
69946993

6995-
elif not is_list_like(value):
6996-
if axis == 1:
6997-
result = self.T.fillna(value=value, limit=limit).T
6998-
new_data = result._mgr
6999-
else:
7000-
new_data = self._mgr.fillna(
7001-
value=value, limit=limit, inplace=inplace
7002-
)
7003-
elif isinstance(value, ABCDataFrame) and self.ndim == 2:
7004-
new_data = self.where(self.notna(), value)._mgr
6994+
elif not is_list_like(value):
6995+
if axis == 1:
6996+
result = self.T.fillna(value=value, limit=limit).T
6997+
new_data = result._mgr
70056998
else:
7006-
raise ValueError(f"invalid fill value with a {type(value)}")
6999+
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
7000+
elif isinstance(value, ABCDataFrame) and self.ndim == 2:
7001+
new_data = self.where(self.notna(), value)._mgr
7002+
else:
7003+
raise ValueError(f"invalid fill value with a {type(value)}")
70077004

70087005
result = self._constructor_from_mgr(new_data, axes=new_data.axes)
70097006
if inplace:

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2543,7 +2543,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
25432543

25442544
notnull = notna
25452545

2546-
def fillna(self, value=None):
2546+
def fillna(self, value):
25472547
"""
25482548
Fill NA/NaN values with the specified value.
25492549

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1675,7 +1675,7 @@ def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
16751675
# (previously declared in base class "IndexOpsMixin")
16761676
_duplicated = duplicated # type: ignore[misc]
16771677

1678-
def fillna(self, value=None, downcast=None):
1678+
def fillna(self, value, downcast=None):
16791679
"""
16801680
fillna is not implemented for MultiIndex
16811681
"""

pandas/tests/extension/base/missing.py

+6
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def test_fillna_scalar(self, data_missing):
6868
expected = data_missing.fillna(valid)
6969
tm.assert_extension_array_equal(result, expected)
7070

71+
def test_fillna_with_none(self, data_missing):
72+
# GH#57723
73+
result = data_missing.fillna(None)
74+
expected = data_missing
75+
tm.assert_extension_array_equal(result, expected)
76+
7177
def test_fillna_limit_pad(self, data_missing):
7278
arr = data_missing.take([1, 0, 0, 0, 1])
7379
result = pd.Series(arr).ffill(limit=2)

pandas/tests/extension/decimal/test_decimal.py

+8
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,14 @@ def test_fillna_series(self, data_missing):
144144
):
145145
super().test_fillna_series(data_missing)
146146

147+
def test_fillna_with_none(self, data_missing):
148+
# GH#57723
149+
# EAs that don't have special logic for None will raise, unlike pandas'
150+
# which interpret None as the NA value for the dtype.
151+
msg = "conversion from NoneType to Decimal is not supported"
152+
with pytest.raises(TypeError, match=msg):
153+
super().test_fillna_with_none(data_missing)
154+
147155
@pytest.mark.parametrize("dropna", [True, False])
148156
def test_value_counts(self, all_data, dropna):
149157
all_data = all_data[:10]

pandas/tests/extension/json/test_json.py

+7
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ def test_fillna_frame(self):
149149
"""We treat dictionaries as a mapping in fillna, not a scalar."""
150150
super().test_fillna_frame()
151151

152+
def test_fillna_with_none(self, data_missing):
153+
# GH#57723
154+
# EAs that don't have special logic for None will raise, unlike pandas'
155+
# which interpret None as the NA value for the dtype.
156+
with pytest.raises(AssertionError):
157+
super().test_fillna_with_none(data_missing)
158+
152159
@pytest.mark.parametrize(
153160
"limit_area, input_ilocs, expected_ilocs",
154161
[

0 commit comments

Comments
 (0)